naoice COI implementation

This commit is contained in:
2026-02-02 11:18:37 +01:00
parent 4abef97bf7
commit c4fd1352c9
5 changed files with 221 additions and 68 deletions

View File

@@ -3,20 +3,23 @@ import numpy as np
from .lib.demand import generate_demand_for_actor, estimate_demand from .lib.demand import generate_demand_for_actor, estimate_demand
from .lib.behavior import sample_behavior from .lib.behavior import sample_behavior
from logging import INFO, getLogger from logging import INFO, getLogger
logger = getLogger(__name__) logger = getLogger(__name__)
logger.setLevel(INFO) logger.setLevel(INFO)
class MarketEngine(): class MarketEngine:
"""implements separate demand distributions for humans and agents per Section 3.1.1""" """implements separate demand distributions for humans and agents per Section 3.1.1"""
def __init__(self, def __init__(
alpha: float, self,
N: int, alpha: float,
human_params: tuple, N: int,
agent_params: tuple, human_params: tuple,
demand_distribution = np.random.normal, agent_params: tuple,
noise_std: float = 1.0): demand_distribution=np.random.normal,
noise_std: float = 1.0,
):
# no defaults for D_H, D_A - force explicit experiment design # no defaults for D_H, D_A - force explicit experiment design
self.alpha = alpha self.alpha = alpha
self.Nagents = int(N * alpha) self.Nagents = int(N * alpha)
@@ -28,31 +31,41 @@ class MarketEngine():
def act(self, prices): def act(self, prices):
# generate separate demands d() per actor type # generate separate demands d() per actor type
demand_h = generate_demand_for_actor(prices, self.human_params, self.noise_std, distribution_method = self.demand_dist) demand_h = generate_demand_for_actor(
demand_a = generate_demand_for_actor(prices, self.agent_params, self.noise_std, distribution_method = self.demand_dist) prices,
self.human_params,
self.noise_std,
distribution_method=self.demand_dist,
)
demand_a = generate_demand_for_actor(
prices,
self.agent_params,
self.noise_std,
distribution_method=self.demand_dist,
)
# sample behavior trajectories from each demand distribution # sample behavior trajectories from each demand distribution
human_t = [sample_behavior(demand_h, human=True) for _ in range(self.Nhumans)] human_t = [sample_behavior(demand_h, human=True) for _ in range(self.Nhumans)]
agent_t = [sample_behavior(demand_a, human=False) for _ in range(self.Nagents)] agent_t = [sample_behavior(demand_a, human=False) for _ in range(self.Nagents)]
return estimate_demand(human_t + agent_t) # store trajectories for agent probability calculation
self.last_trajectories = human_t + agent_t
return estimate_demand(self.last_trajectories)
def measure(self): def measure(self):
pass pass
class PricingEngine():
def __init__(self, class PricingEngine:
) -> None: def __init__(
self,
) -> None:
pass pass
def act(self, demand): def act(self, demand):
return np.random.uniform(low=25, high=100, size=10) return np.random.uniform(low=25, high=100, size=10)
class Limbo:
class Limbo(): def __init__(self, platform, market) -> None:
def __init__(self,
platform,
market
) -> None:
self.platform_turn = True self.platform_turn = True
self.platform = platform self.platform = platform
self.market = market self.market = market
@@ -67,9 +80,12 @@ class Limbo():
print(self.output) print(self.output)
self.platform_turn = not self.platform_turn self.platform_turn = not self.platform_turn
if __name__ == "__main__": if __name__ == "__main__":
platform = PricingEngine() platform = PricingEngine()
market = MarketEngine(alpha=0.3, N=100, human_params=(50, 10), agent_params=(45, 15)) market = MarketEngine(
alpha=0.3, N=100, human_params=(50, 10), agent_params=(45, 15)
)
limbo = Limbo(platform, market) limbo = Limbo(platform, market)
for _ in range(10): for _ in range(10):
limbo.step() limbo.step()

View File

@@ -1,6 +1,7 @@
from .demand import estimate_demand, generate_demand_for_actor from .demand import estimate_demand, generate_demand_for_actor
from .behavior import sample_behavior from .behavior import sample_behavior, get_transition_models, trajectory_to_events
from .render import DashboardRenderer, style_axis from .render import DashboardRenderer, style_axis
from .wrappers import EconomicMetricsWrapper from .wrappers import EconomicMetricsWrapper
from .callbacks import MetricsCallback, EvalMetricsCallback from .callbacks import MetricsCallback, EvalMetricsCallback
from .providers import ProviderBenchmark, ProviderResult, BenchmarkConfig from .providers import ProviderBenchmark, ProviderResult, BenchmarkConfig
from .coi import compute_coi_leakage, compute_erosion_metrics, compute_agent_probability

View File

@@ -1,3 +1,8 @@
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parents[2]))
from sim.rl.behavior_loader.models import ( from sim.rl.behavior_loader.models import (
BehaviorModel, BehaviorModel,
AgentBehaviorModel, AgentBehaviorModel,
@@ -7,11 +12,9 @@ import pandas as pd
import numpy as np import numpy as np
from .demand import generate_demand_for_actor from .demand import generate_demand_for_actor
base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments" base_dir = Path(__file__).parents[2] / "experiments"
human_dir, agent_dir = ( human_dir = str(base_dir / "collected_data")
f"{base_dir}/collected_data/", agent_dir = str(base_dir / "agents" / "collected_data")
f"{base_dir}/agents/collected_data/",
)
_cache = {} # lazy cache for models and base pivots _cache = {} # lazy cache for models and base pivots
@@ -25,6 +28,46 @@ def _get_base_pivot(human: bool):
return _cache[key] return _cache[key]
def get_transition_models():
"""load human and agent transition models for agent probability calculation
returns:
tuple: (human_transitions, agent_transitions) as dicts of event->event->prob
"""
human_model = BehaviorModel(human_dir)
agent_model = AgentBehaviorModel(agent_dir)
human_mdp = human_model.build_MDP()
agent_mdp = agent_model.build_MDP()
human_trans = aggregate_event_transitions(human_mdp)
agent_trans = aggregate_event_transitions(agent_mdp)
return human_trans, agent_trans
def trajectory_to_events(trajectory: list) -> list:
"""extract event names from trajectory for KL divergence calculation
trajectories are in format 'eventName_product0', extract just eventName
args:
trajectory: list like ['view_product0', 'add_to_cart_product1', 'checkout_product1']
returns:
list: event names like ['view', 'add_to_cart', 'checkout']
"""
events = []
for state in trajectory:
# state format from sample_behavior: 'eventName_productX'
if "_product" in state:
event = state.rsplit("_product", 1)[0]
else:
event = state
events.append(event)
return events
def adjust_behavior_to_condition(condition, transition_matrix): def adjust_behavior_to_condition(condition, transition_matrix):
# expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition # expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition
cond_norm = condition / np.sum(condition) cond_norm = condition / np.sum(condition)

View File

@@ -3,30 +3,42 @@ from gymnasium import spaces
import numpy as np import numpy as np
from .engine import Limbo, MarketEngine, PricingEngine from .engine import Limbo, MarketEngine, PricingEngine
from .lib.render import DashboardRenderer from .lib.render import DashboardRenderer
from .lib.coi import compute_coi_proxy from .lib.coi import (
compute_coi_leakage,
compute_erosion_metrics,
compute_agent_probability,
)
from .lib.behavior import get_transition_models, trajectory_to_events
from .lib.wrappers import EconomicMetricsWrapper from .lib.wrappers import EconomicMetricsWrapper
class PHANTOM(gym.Env): class PHANTOM(gym.Env):
"""Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand.""" """Gymnasium wrapper for Limbo pricing-market simulation implementing thesis COI framework
reward = R(p,d) - λ·COI_leak(p,τ') per thesis Section on DR-RL
COI_leak uses behavioral divergence to estimate agent probability f(τ')
"""
metadata = {"render_modes": ["human", "ansi"]} metadata = {"render_modes": ["human", "ansi"]}
def __init__(self, def __init__(
n_products: int = 10, self,
alpha: float = 0.3, n_products: int = 10,
N: int = 100, alpha: float = 0.3,
human_params: tuple = (50.0, 10.0), N: int = 100,
agent_params: tuple = (45.0, 15.0), human_params: tuple = (50.0, 10.0),
noise_std: float = 1.0, agent_params: tuple = (45.0, 15.0),
price_bounds: tuple = (10.0, 150.0), noise_std: float = 1.0,
lambda_coi: float = 0.1, price_bounds: tuple = (10.0, 150.0),
coi_window: int = 10, lambda_coi: float = 0.1,
render_mode: str = None): coi_window: int = 10,
render_mode: str = None,
):
super().__init__() super().__init__()
self.n_products = n_products self.n_products = n_products
self.price_bounds = price_bounds self.price_bounds = price_bounds
self.lambda_coi = lambda_coi self.lambda_coi = lambda_coi
self.coi_window = coi_window # K steps for rolling COI calculation self.coi_window = coi_window
self.render_mode = render_mode self.render_mode = render_mode
self.alpha = alpha self.alpha = alpha
self.N = N self.N = N
@@ -34,20 +46,34 @@ class PHANTOM(gym.Env):
self.agent_params = agent_params self.agent_params = agent_params
self.market = MarketEngine( self.market = MarketEngine(
alpha=alpha, N=N, alpha=alpha,
human_params=human_params, agent_params=agent_params, noise_std=noise_std N=N,
human_params=human_params,
agent_params=agent_params,
noise_std=noise_std,
) )
self._platform_stub = PricingEngine() self._platform_stub = PricingEngine()
self._limbo = Limbo(self._platform_stub, self.market) self._limbo = Limbo(self._platform_stub, self.market)
self.action_space = spaces.Box( self.action_space = spaces.Box(
low=price_bounds[0], high=price_bounds[1], low=price_bounds[0],
shape=(n_products,), dtype=np.float32 high=price_bounds[1],
shape=(n_products,),
dtype=np.float32,
)
self.observation_space = spaces.Dict(
{
"demand": spaces.Box(
low=0.0, high=100.0, shape=(n_products,), dtype=np.float32
),
"prices": spaces.Box(
low=price_bounds[0],
high=price_bounds[1],
shape=(n_products,),
dtype=np.float32,
),
}
) )
self.observation_space = spaces.Dict({
"demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32),
"prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32),
})
self._prices = None self._prices = None
self._demand = None self._demand = None
@@ -56,25 +82,61 @@ class PHANTOM(gym.Env):
self._price_history = [] self._price_history = []
self._revenue_history = [] self._revenue_history = []
self._renderer = None self._renderer = None
self._initial_episode_prices = None # prices at episode start for COI calc self._initial_episode_prices = None
self._trajectories = [] # session trajectories for agent prob calculation
# load behavioral models for agent probability estimation
try:
self._human_trans, self._agent_trans = get_transition_models()
except Exception:
# fallback if behavioral data unavailable
self._human_trans, self._agent_trans = None, None
def _get_obs(self) -> dict: def _get_obs(self) -> dict:
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32) demand_arr = np.array(
[self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32
)
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)} return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
def _compute_coi_proxy(self): def _compute_agent_prob(self) -> float:
return compute_coi_proxy( """estimate agent probability from accumulated trajectories using KL divergence"""
self._price_history, self._demand_history, self._initial_episode_prices, if (
self._prices, self.price_bounds, self.alpha, self.coi_window not self._trajectories
or self._human_trans is None
or self._agent_trans is None
):
return self.alpha # fallback to contamination level
# aggregate all trajectories from this episode
all_events = []
for traj in self._trajectories:
all_events.extend(trajectory_to_events(traj))
if len(all_events) < 2:
return self.alpha
return compute_agent_probability(
all_events, self._human_trans, self._agent_trans
) )
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float: def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)])) revenue = np.sum(
coi_penalty = self.lambda_coi * self._compute_coi_proxy() prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)])
)
# compute agent probability from behavioral trajectories
agent_prob = self._compute_agent_prob()
# COI leakage: minimal implementation per thesis
coi_leakage = compute_coi_leakage(prices, agent_prob)
coi_penalty = self.lambda_coi * coi_leakage
return float(revenue - coi_penalty) return float(revenue - coi_penalty)
def _record_history(self): def _record_history(self):
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)]) demand_arr = np.array(
[self._demand.get(i, 0.0) for i in range(self.n_products)]
)
self._demand_history.append(demand_arr) self._demand_history.append(demand_arr)
self._price_history.append(self._prices.copy()) self._price_history.append(self._prices.copy())
self._revenue_history.append(np.sum(self._prices * demand_arr)) self._revenue_history.append(np.sum(self._prices * demand_arr))
@@ -82,10 +144,11 @@ class PHANTOM(gym.Env):
def reset(self, seed=None, options=None): def reset(self, seed=None, options=None):
super().reset(seed=seed) super().reset(seed=seed)
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products) self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
self._initial_episode_prices = self._prices.copy() # snapshot for COI calculation self._initial_episode_prices = self._prices.copy()
self._demand = self.market.act(self._prices) self._demand = self.market.act(self._prices)
self._step_count = 0 self._step_count = 0
self._demand_history, self._price_history, self._revenue_history = [], [], [] self._demand_history, self._price_history, self._revenue_history = [], [], []
self._trajectories = []
self._record_history() self._record_history()
return self._get_obs(), {} return self._get_obs(), {}
@@ -95,15 +158,36 @@ class PHANTOM(gym.Env):
self._step_count += 1 self._step_count += 1
self._record_history() self._record_history()
coi_proxy = self._compute_coi_proxy() # capture trajectories generated by market for agent prob estimation
if hasattr(self.market, "last_trajectories"):
self._trajectories.extend(self.market.last_trajectories)
agent_prob = self._compute_agent_prob()
coi_leakage = compute_coi_leakage(self._prices, agent_prob)
reward = self._compute_reward(self._prices, self._demand) reward = self._compute_reward(self._prices, self._demand)
terminated = self._step_count >= 100 terminated = self._step_count >= 100
# legacy erosion metrics for comparison
erosion = compute_erosion_metrics(
self._price_history,
self._demand_history,
self._initial_episode_prices,
self._prices,
self.price_bounds,
self.alpha,
self.coi_window,
)
info = { info = {
"step": self._step_count, "step": self._step_count,
"coi_proxy": coi_proxy, "agent_prob": agent_prob,
"coi_penalty": self.lambda_coi * coi_proxy, "coi_leakage": coi_leakage,
"raw_revenue": np.sum(self._prices * np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])), "coi_penalty": self.lambda_coi * coi_leakage,
"erosion_metrics": erosion,
"raw_revenue": np.sum(
self._prices
* np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
),
} }
return self._get_obs(), reward, terminated, False, info return self._get_obs(), reward, terminated, False, info
@@ -114,10 +198,16 @@ class PHANTOM(gym.Env):
p, q = np.array(self._price_history), np.array(self._demand_history) p, q = np.array(self._price_history), np.array(self._demand_history)
dp, dq = np.diff(p, axis=0), np.diff(q, axis=0) dp, dq = np.diff(p, axis=0), np.diff(q, axis=0)
valid = np.abs(dp) > 0.5 valid = np.abs(dp) > 0.5
with np.errstate(divide='ignore', invalid='ignore'): with np.errstate(divide="ignore", invalid="ignore"):
elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0) elasticity = np.where(
valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0
)
elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0) elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0)
return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products) return (
np.mean(elasticity, axis=0)
if len(elasticity) > 0
else np.zeros(self.n_products)
)
def render(self): def render(self):
if self.render_mode == "human": if self.render_mode == "human":
@@ -125,7 +215,9 @@ class PHANTOM(gym.Env):
self._renderer = DashboardRenderer() self._renderer = DashboardRenderer()
self._renderer.render(self) self._renderer.render(self)
elif self.render_mode == "ansi": elif self.render_mode == "ansi":
return f"step={self._step_count}, prices={self._prices}, demand={self._demand}" return (
f"step={self._step_count}, prices={self._prices}, demand={self._demand}"
)
return None return None
def close(self): def close(self):
@@ -140,6 +232,7 @@ if __name__ == "__main__":
class RandomPolicy: class RandomPolicy:
"""Minimal SB3-compatible random policy for baseline testing.""" """Minimal SB3-compatible random policy for baseline testing."""
def __init__(self, env): def __init__(self, env):
self.env = env self.env = env
self.num_timesteps = 0 self.num_timesteps = 0

View File

@@ -27,7 +27,7 @@ These behavioral signals serve as inputs for a Distributionally Robust Reinforce
\noindent\textbf{Keywords:} Dynamic Pricing, LLM Agents, Adversarial Machine Learning, E-commerce, Behavioral Detection, Reinforcement Learning \noindent\textbf{Keywords:} Dynamic Pricing, LLM Agents, Adversarial Machine Learning, E-commerce, Behavioral Detection, Reinforcement Learning
\vspace{1em} \vspace{1em}
\noindent\textbf{Acknowledgments:} Eugene Bykovets, PhD - ETH for helping with problem formulation. This research was supported by the TPU Research Cloud program. \noindent\textbf{Acknowledgments:} This research was supported by the TPU Research Cloud program.
\clearpage \clearpage
\input{chapters/01-intro} \input{chapters/01-intro}