chore: make lib backwards compatible

This commit is contained in:
2026-01-21 19:12:35 +01:00
parent ee70f02a1f
commit 0f5f8affab
3 changed files with 126 additions and 152 deletions

View File

@@ -1,7 +1,5 @@
from sys import intern
import gymnasium as gym
from gymnasium import spaces
from matplotlib import interactive
import numpy as np
from dataclasses import dataclass
import pandas as pd
@@ -15,7 +13,7 @@ class BusinessLogicConstraints():
max_price_adjustment: float = 0.30
system_max_price: float = 500.0
system_min_price: float = 1.0
product_catelogue_size: int = 100
product_catalogue_size: int = 100
episode_length: int = 200
sessions_per_step: int = 250
agent_share: float = 0.25
@@ -37,17 +35,42 @@ class BusinessLogicConstraints():
def _sigmoid(x: np.ndarray) -> np.ndarray:
return 1.0 / (1.0 + np.exp(-x))
class BehavioralProfile:
"""simple markov chain model for generating synthetic interaction events"""
def __init__(self, actor: str, purchase_probs: np.ndarray):
self.actor = actor
self.purchase_probs = purchase_probs
self.states = ['view', 'cart', 'checkout']
# transition matrix: view->cart 0.3, view->view 0.6, view->exit 0.1, cart->checkout 0.5, cart->view 0.4, cart->exit 0.1
self.trans = {'view': {'view': 0.6, 'cart': 0.3, 'exit': 0.1}, 'cart': {'checkout': 0.5, 'view': 0.4, 'exit': 0.1}, 'checkout': {'exit': 1.0}}
if actor == 'agents': # agents browse more before purchasing
self.trans['view'] = {'view': 0.75, 'cart': 0.15, 'exit': 0.1}
self.trans['cart'] = {'checkout': 0.3, 'view': 0.6, 'exit': 0.1}
def sample(self, rng: np.random.Generator) -> Dict[str, Any]:
"""sample single interaction event"""
product_idx = rng.integers(0, len(self.purchase_probs))
state = 'view' # always start with view
# pick next state based on transition probs
trans = self.trans.get(state, {'exit': 1.0})
next_state = rng.choice(list(trans.keys()), p=list(trans.values()))
price_paid = 0.0 if next_state != 'checkout' else float(rng.uniform(50, 200))
return {'action': state, 'product_idx': product_idx, 'actor': 'agent' if self.actor == 'agents' else 'human', 't': 0.0, 'price_paid': price_paid}
def _load_behavioral_profile(actor: str, demand_forcing: np.ndarray) -> BehavioralProfile:
"""returns a behavioral profile for generating synthetic sessions
actor: 'humans' or 'agents'
demand_forcing: per-product purchase probabilities used to weight interactions
"""
return BehavioralProfile(actor, demand_forcing)
class CommercePlatform:
"""
This is just an extension of the state management for the environment, it does not implement anything dynamic just helps us simulate demand.
"""
def __init__(self,
product_catelogue_size: int,
max_price: float,
min_price: float,
constraints: BusinessLogicConstraints):
self.product_catelogue_size = product_catelogue_size
self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catelogue_size,))
"""state management for the environment, simulates demand"""
def __init__(self, product_catalogue_size: int, max_price: float, min_price: float, constraints: BusinessLogicConstraints):
self.product_catalogue_size = product_catalogue_size
self.product_supply = np.random.uniform(low=10, high=50, size=(self.product_catalogue_size,))
self.max_price = max_price
self.min_price = min_price
self.constraints = constraints
@@ -55,27 +78,12 @@ class CommercePlatform:
self._rng = np.random.default_rng(constraints.seed)
self._last_interaction_df: pd.DataFrame = pd.DataFrame()
def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
# ground truth purchase propensities
p = np.clip(prices, self.min_price, self.max_price)
pn = p / self.max_price
human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
return {
"human_purchase_prob": np.clip(human_prob, 0.0, 0.95),
"agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)
}
def _load_behavioral_profile(actor : str, demand_forcing):
"""
This returns a markov chain with average weights which we get from interaction data of our experiments.
This defines transition probabilities between different events:
search -> view_item_price_binN: 0.7
view_item_price_binN -> add_to_cart: 0.2
we also must reweight with the demand_forcing vector or purchase probabilities per-product
"""
return {"human_purchase_prob": np.clip(human_prob, 0.0, 0.95), "agent_purchase_prob": np.clip(agent_prob, 0.0, 0.95)}
def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
demand = self.setup_true_demand(base_prices)
@@ -162,22 +170,22 @@ class PHANTOMEnv(gym.Env):
self.constraints = BusinessLogicConstraints()
self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment,
high=self.constraints.max_price_adjustment,
shape=(self.constraints.product_catelogue_size,), dtype=np.float32)
shape=(self.constraints.product_catalogue_size,), dtype=np.float32)
self.observation_space = spaces.Dict({
"elasticity": spaces.Dict({
"price": spaces.Box(
low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32),
high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32),
low=np.full((self.constraints.product_catalogue_size,), self.constraints.system_min_price, dtype=np.float32),
high=np.full((self.constraints.product_catalogue_size,), self.constraints.system_max_price, dtype=np.float32),
dtype=np.float32),
"demand": spaces.Box(
low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32),
low=np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32),
high=np.full((self.constraints.product_catalogue_size,), 1e6, dtype=np.float32),
dtype=np.float32),
})
# TODO: define more features that we compute from the interaction data
})
self.commerce_platform = CommercePlatform(
product_catelogue_size=self.constraints.product_catelogue_size,
product_catalogue_size=self.constraints.product_catalogue_size,
max_price=self.constraints.system_max_price,
min_price=self.constraints.system_min_price,
constraints=self.constraints)
@@ -192,12 +200,12 @@ class PHANTOMEnv(gym.Env):
self._rng = np.random.default_rng(seed)
self.commerce_platform._rng = np.random.default_rng(seed)
self.t = 0
init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32)
init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catalogue_size,)).astype(np.float32)
self._prev_prices = init_prices.copy()
self.state = {
"elasticity": {
"price": init_prices,
"demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
"demand": np.zeros((self.constraints.product_catalogue_size,), dtype=np.float32),
}
}
return self.state, {}
@@ -210,38 +218,35 @@ class PHANTOMEnv(gym.Env):
self.constraints.system_max_price).astype(np.float32)
self.state["elasticity"]["price"] = new_prices
# TODO: use the commerce platform to simulate sessions
interactions_df = self.commerce_platform._simulate_sessions(new_prices)
result = self.commerce_platform.compute_interaction_features(interactions_df)
# TODO: implement COI computation to use in reward
COI = 0.0
COI = 0.0 # TODO: implement cost-of-information computation
volatility = 0.0 if self._prev_prices is None else \
float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6))))
self._prev_prices = new_prices.copy()
revenue_observed = float(result["revenue_observed"])
agent_loss = float(result["agent_loss"])
# extract metrics with safe defaults for incomplete simulation
revenue_observed = float(result.get("revenue_observed", result.get("mean_sale_price", 0.0)))
agent_loss = float(result.get("agent_loss", 0.0))
reward = (revenue_observed
- COI
- self.constraints.w_agent_loss * agent_loss
- self.constraints.w_volatility * volatility
- self.constraints.w_estimation_error
)
- self.constraints.w_estimation_error)
terminated = self.t >= self.constraints.episode_length
info = {
"t": self.t,
"revenue_observed": revenue_observed,
"revenue_oracle": float(result["revenue_oracle"]),
"revenue_oracle": float(result.get("revenue_oracle", revenue_observed)),
"agent_loss": agent_loss,
"ux_volatility": volatility,
"mean_internal_error": err_mean,
"look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)),
"mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)),
"true_human_purchases_total": float(np.sum(result["true_human_demand"])),
"true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])),
"look_to_book": float(result.get("look_to_book", 0.0)),
"mean_sale_price": float(result.get("mean_sale_price", 0.0)),
"true_human_purchases_total": 0.0, # TODO: track from simulation
"true_agent_purchases_total": 0.0, # TODO: track from simulation
}
return self.state, float(reward), terminated, False, info
@@ -250,46 +255,43 @@ if __name__ == "__main__":
import matplotlib.pyplot as plt
from collections import defaultdict
runs = {}
for use_defense in (False, True):
env = PHANTOMEnv(use_defense=use_defense)
obs, _ = env.reset(seed=42)
metrics = defaultdict(list)
total_reward = 0.0
done = False
env = PHANTOMEnv(constraints=BusinessLogicConstraints())
obs, _ = env.reset(seed=42)
metrics = defaultdict(list)
total_reward = 0.0
done = False
while not done:
action = env.action_space.sample()
obs, reward, done, _, info = env.step(action)
total_reward += reward
p_mean = float(np.mean(obs["elasticity"]["price"]))
q_mean = float(np.mean(obs["elasticity"]["demand"]))
p_std = float(np.std(obs["elasticity"]["price"]))
while not done:
action = env.action_space.sample()
obs, reward, done, _, info = env.step(action)
total_reward += reward
p_mean = float(np.mean(obs["elasticity"]["price"]))
q_mean = float(np.mean(obs["elasticity"]["demand"]))
p_std = float(np.std(obs["elasticity"]["price"]))
metrics['t'].append(info['t'])
metrics['price_mean'].append(p_mean)
metrics['price_std'].append(p_std)
metrics['demand_mean'].append(q_mean)
metrics['revenue_observed'].append(info['revenue_observed'])
metrics['revenue_oracle'].append(info['revenue_oracle'])
metrics['agent_loss'].append(info['agent_loss'])
metrics['ux_volatility'].append(info['ux_volatility'])
metrics['look_to_book'].append(info['look_to_book'])
metrics['reward'].append(reward)
metrics['human_purchases'].append(info['true_human_purchases_total'])
metrics['agent_purchases'].append(info['true_agent_purchases_total'])
metrics['t'].append(info['t'])
metrics['price_mean'].append(p_mean)
metrics['price_std'].append(p_std)
metrics['demand_mean'].append(q_mean)
metrics['revenue_observed'].append(info['revenue_observed'])
metrics['revenue_oracle'].append(info['revenue_oracle'])
metrics['agent_loss'].append(info['agent_loss'])
metrics['ux_volatility'].append(info['ux_volatility'])
metrics['look_to_book'].append(info['look_to_book'])
metrics['reward'].append(reward)
metrics['human_purchases'].append(info['true_human_purchases_total'])
metrics['agent_purchases'].append(info['true_agent_purchases_total'])
if info['t'] % 20 == 0 or done:
print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
if info['t'] % 20 == 0 or done:
print(f"t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} q={q_mean:6.2f} "
f"rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
runs[use_defense] = metrics
print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
print(f"total_reward={total_reward:.2f}")
fig, axes = plt.subplots(3, 3, figsize=(15, 12))
fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
fig.suptitle('PHANTOM Environment Run', fontsize=14, fontweight='bold')
plot_configs = [
('price_mean', 'Mean Price', 'Price'),
@@ -305,13 +307,10 @@ if __name__ == "__main__":
for idx, (key, title, ylabel) in enumerate(plot_configs):
ax = axes[idx // 3, idx % 3]
for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
m = runs[use_defense]
ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
ax.plot(metrics['t'], metrics[key], color='blue', alpha=0.7, linewidth=1.5)
ax.set_xlabel('Step')
ax.set_ylabel(ylabel)
ax.set_title(title, fontsize=10, fontweight='bold')
ax.legend(loc='best', fontsize=8)
ax.grid(True, alpha=0.3)
plt.tight_layout()