Merge pull request #44 from velocitatem/agent-behavior-loader-developemen

Agent behavior loader developement + rl loop definition and e2e tests.
This commit is contained in:
Daniel Alves Rösel
2026-01-31 10:21:54 +01:00
committed by GitHub
71 changed files with 5073 additions and 751 deletions

30
.gitignore vendored
View File

@@ -5,18 +5,28 @@
**/.virtual_documents/ **/.virtual_documents/
**/session_*.svg **/session_*.svg
**/*graph.svg **/*graph.svg
paper/src/bib/auto **/auto/*.el
*.old
**/package-lock.json
**/*.parquet
**/_build/
# Airflow logs - exclude DAG run logs paper/src/bib/auto
=======
**/_build/
paper/src/auto/*
paper/src/bib/auto
docs/goals/*.md
PHANTOM.wiki/
experiments/airflow/logs/* experiments/airflow/logs/*
experiments/airflow/logs/scheduler/ experiments/airflow/logs/scheduler/
experiments/airflow/logs/dag_processor_manager/ experiments/airflow/logs/dag_processor_manager/
experiments/collected_data/* experiments/collected_data/
experiments/agents/collected_data/
paper/src/auto/* sim/rl/behavior_loader/*.dot
lib/ sim/rl/behavior_loader/*.png
docs/goals/*.md sim/rl/behavior_loader/*.svg
PHANTOM.wiki/ sim/rl/behavior_loader/*.pdf
tests/e2e/node_modules/** tests/e2e/node_modules/**
**/auto/*.el lab/case/thesis/runs*/
*.old sim/case/thesis_simplified/runs*/

View File

@@ -49,8 +49,10 @@ test.backend: $(VENV)
test.e2e: test.e2e:
@cd tests/e2e && npm install @cd tests/e2e && npm install
@cd tests/e2e && npx playwright install chromium @cd tests/e2e && npx playwright install chromium
@test -f tests/e2e/.env || cp tests/e2e/.env.example tests/e2e/.env
@timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1) @timeout 30 bash -c 'until curl -sf http://localhost:5000/health > /dev/null 2>&1; do sleep 1; done' || (echo "Backend not ready" && exit 1)
@timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1) @timeout 30 bash -c 'until curl -sf http://localhost:3000 > /dev/null 2>&1; do sleep 1; done' || (echo "Web app not ready" && exit 1)
@timeout 30 bash -c 'until curl -sf http://localhost:8085/health > /dev/null 2>&1; do sleep 1; done' || (echo "Airflow not ready" && exit 1)
@cd tests/e2e && npm test @cd tests/e2e && npm test
.PHONY: test.all .PHONY: test.all

View File

@@ -47,53 +47,52 @@ def health() -> dict:
@app.get("/api/{mode}/price/{productId}", response_model=PriceResponse) @app.get("/api/{mode}/price/{productId}", response_model=PriceResponse)
def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)): def get_price(mode: Literal['hotel', 'airline'], productId: str, sessionId: Optional[str] = Query(None), experimentId: Optional[str] = Query(None)):
"""
THIS is the fast lookup service (mechanism).
Priority: session-keyed price > global optimal price > base price
"""
product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0] product = supabase.table(f'{mode}_products').select("metadata").eq('id', productId).execute().data[0]
if not product: raise HTTPException(404, f"Product {productId} not found") if not product: raise HTTPException(404, f"Product {productId} not found")
metadata = product['metadata'] metadata = product['metadata']
base_price = metadata.get('base_price', 100.0) base_price = metadata.get('base_price', 100.0)
# fetch pre-computed prices from registry # PRIORITY 1: session-aware price (computed by Airflow worker)
if sessionId:
session_price = registry.get_session_price(sessionId, productId)
if session_price is not None:
return PriceResponse(
productId=productId,
price=session_price,
base_price=base_price,
markup=session_price/base_price,
elasticity=None,
model_version='session-aware'
)
# PRIORITY 2: global pre-computed prices (surge pricing)
prices_df = registry.get_prices('latest') prices_df = registry.get_prices('latest')
elasticity_df = registry.get_elasticity('latest') if prices_df is not None:
product_price_row = prices_df[prices_df['productId'] == productId]
if prices_df is None: if not product_price_row.empty:
# fallback: no pre-computed prices available optimal_price = float(product_price_row['optimal_price'].iloc[0])
return PriceResponse( return PriceResponse(
productId=productId, productId=productId,
price=base_price, price=optimal_price,
base_price=base_price, base_price=base_price,
markup=1.0, markup=optimal_price/base_price,
elasticity=None elasticity=None,
) model_version='surge'
)
# lookup pre-computed price for this product
product_price_row = prices_df[prices_df['productId'] == productId]
if product_price_row.empty:
# product not in pre-computed prices, fallback to base
return PriceResponse(
productId=productId,
price=base_price,
base_price=base_price,
markup=1.0,
elasticity=None
)
optimal_price = float(product_price_row['optimal_price'].iloc[0]) # TODO: use optimal_price everywhere as aresult
# get elasticity if available
product_elasticity = None
if elasticity_df is not None:
product_elasticity_row = elasticity_df[elasticity_df['productId'] == productId]
if not product_elasticity_row.empty:
product_elasticity = float(product_elasticity_row['elasticity'].iloc[0])
# PRIORITY 3: fallback to base price
return PriceResponse( return PriceResponse(
productId=productId, productId=productId,
price=optimal_price, price=base_price,
base_price=base_price, base_price=base_price,
markup=optimal_price/base_price, markup=1.0,
elasticity=product_elasticity elasticity=None,
model_version='base'
) )
@app.get("/models") @app.get("/models")

View File

@@ -198,12 +198,16 @@ def dump_logs(
auto_offset_reset='earliest', auto_offset_reset='earliest',
enable_auto_commit=False, enable_auto_commit=False,
value_deserializer=lambda x: json.loads(x.decode('utf-8')), value_deserializer=lambda x: json.loads(x.decode('utf-8')),
consumer_timeout_ms=5000 consumer_timeout_ms=30000,
fetch_max_wait_ms=10000,
max_poll_records=1000
) )
events = [] events = []
for msg in consumer: for msg in consumer:
events.append(msg.value) events.append(msg.value)
if last_n and len(events) >= last_n * 2:
break
consumer.close() consumer.close()

View File

@@ -112,11 +112,14 @@ services:
depends_on: depends_on:
- postgres - postgres
environment: environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor - AIRFLOW__CORE__EXECUTOR=LocalExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- AIRFLOW__CORE__PARALLELISM=16
- AIRFLOW__CORE__DAG_CONCURRENCY=8
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
- _AIRFLOW_DB_MIGRATE=true - _AIRFLOW_DB_MIGRATE=true
- _AIRFLOW_WWW_USER_CREATE=true - _AIRFLOW_WWW_USER_CREATE=true
- _AIRFLOW_WWW_USER_USERNAME=admin - _AIRFLOW_WWW_USER_USERNAME=admin
@@ -136,14 +139,20 @@ services:
- airflow-init - airflow-init
- redis - redis
environment: environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor - AIRFLOW__CORE__EXECUTOR=LocalExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
- AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- AIRFLOW__CORE__PARALLELISM=16
- AIRFLOW__CORE__DAG_CONCURRENCY=8
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
- AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
- AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
- AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true - AIRFLOW__WEBSERVER__EXPOSE_CONFIG=true
- AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
- AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
- KAFKA_HOST=kafka - KAFKA_HOST=kafka
- KAFKA_PORT=29092 - KAFKA_PORT=29092
- BACKEND_URL=http://backend:5000 - BACKEND_URL=http://backend:5000
@@ -173,13 +182,20 @@ services:
redis: redis:
condition: service_started condition: service_started
environment: environment:
- AIRFLOW__CORE__EXECUTOR=SequentialExecutor - AIRFLOW__CORE__EXECUTOR=LocalExecutor
- AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres/airflow
- AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY} - AIRFLOW__CORE__FERNET_KEY=${AIRFLOW_FERNET_KEY}
- AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true - AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=true
- AIRFLOW__CORE__LOAD_EXAMPLES=false - AIRFLOW__CORE__LOAD_EXAMPLES=false
- AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true - AIRFLOW__CORE__ENABLE_XCOM_PICKLING=true
- AIRFLOW__CORE__PARALLELISM=16
- AIRFLOW__CORE__DAG_CONCURRENCY=8
- AIRFLOW__CORE__MAX_ACTIVE_RUNS_PER_DAG=4
- AIRFLOW__SCHEDULER__MIN_FILE_PROCESS_INTERVAL=30
- AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL=60
- AIRFLOW__SCHEDULER__PARSING_PROCESSES=2
- AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY} - AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW_SECRET_KEY}
- AIRFLOW__API__AUTH_BACKENDS=airflow.api.auth.backend.basic_auth
- KAFKA_HOST=kafka - KAFKA_HOST=kafka
- KAFKA_PORT=29092 - KAFKA_PORT=29092
- BACKEND_URL=http://backend:5000 - BACKEND_URL=http://backend:5000

66
engine/engine.py Normal file
View File

@@ -0,0 +1,66 @@
from sys import platform
import numpy as np
from .lib.demand import generate_demand, estimate_demand
from .lib.behavior import sample_behavior
from logging import INFO, getLogger
logger = getLogger(__name__)
logger.setLevel(INFO)
class MarketEngine():
def __init__(self,
alpha = 0.5,
N = 100,
demand_distribution = (50, 10),
demand_sampling_function = np.random.normal):
self.Nagents = int(N*alpha)
self.Nhumans = int(N*(1-alpha))
self.demand = (demand_sampling_function, demand_distribution)
def act(self, prices):
demand = generate_demand(prices, *self.demand)
sample_n = lambda n, human: [sample_behavior(demand, human=human) for _ in range(n)]
human_t, agent_t = sample_n(self.Nhumans, True), sample_n(self.Nagents, False)
trajectories = human_t + agent_t
demand_estimate = estimate_demand(trajectories)
return demand_estimate
def measure(self):
pass
class PricingEngine():
def __init__(self,
) -> None:
pass
def act(self, demand):
return np.random.uniform(low=25, high=100, size=10)
class Limbo():
def __init__(self,
platform,
market
) -> None:
self.platform_turn = True
self.platform = platform
self.market = market
self.output = None
def step(self):
# we could code golf this a little bit
if self.platform_turn:
self.output = self.platform.act(self.output)
else:
self.output = self.market.act(self.output)
print(self.output)
self.platform_turn = not self.platform_turn
if __name__ == "__main__":
platform = PricingEngine()
market = MarketEngine()
limbo = Limbo(platform, market)
for _ in range(10):
limbo.step()

3
engine/lib/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
from .demand import generate_demand, estimate_demand
from .behavior import sample_behavior
from .render import DashboardRenderer, style_axis

47
engine/lib/behavior.py Normal file
View File

@@ -0,0 +1,47 @@
from sim.rl.behavior_loader.models import BehaviorModel, AgentBehaviorModel, aggregate_event_transitions
import pandas as pd
import numpy as np
from .demand import generate_demand
base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
_cache = {} # lazy cache for models and base pivots
def _get_base_pivot(human: bool):
key = 'human' if human else 'agent'
if key not in _cache:
model = BehaviorModel(human_dir) if human else AgentBehaviorModel(agent_dir)
mdp = model.build_MDP()
_cache[key] = pd.DataFrame(aggregate_event_transitions(mdp)).fillna(0.0)
return _cache[key]
def adjust_behavior_to_condition(condition, transition_matrix):
# expand NxN transition matrix to (N*P)x(N*P) weighted by demand condition
cond_norm = condition / np.sum(condition)
n_products = len(condition)
base_vals = transition_matrix.values
base_cols, base_rows = transition_matrix.columns.tolist(), transition_matrix.index.tolist()
# expand via kronecker-like tiling: each cell becomes a P*P block weighted by outer product of cond_norm
expanded = np.kron(base_vals, np.outer(cond_norm, cond_norm))
new_cols = [f"{c}_product{p}" for c in base_cols for p in range(n_products)]
new_rows = [f"{r}_product{p}" for r in base_rows for p in range(n_products)]
return pd.DataFrame(expanded, index=new_rows, columns=new_cols)
def sample_behavior(condition, human=True, max_len=40):
base_pivot = _get_base_pivot(human)
adjusted_transitions = adjust_behavior_to_condition(condition, base_pivot)
trajectory = [np.random.choice(adjusted_transitions.index)]
while len(trajectory) < max_len or 'checkout' in trajectory[-1]:
probs = adjusted_transitions.loc[trajectory[-1]].values
sample = np.random.choice(adjusted_transitions.columns, p=probs/np.sum(probs) if np.sum(probs) > 0 else None)
trajectory.append(sample)
return trajectory
if __name__ == "__main__":
t=sample_behavior(generate_demand(np.array([10,20,30])), human=True)
print(t)
t=sample_behavior(generate_demand(np.array([10,20,30])), human=False)
print(t)

45
engine/lib/demand.py Normal file
View File

@@ -0,0 +1,45 @@
import logging
import numpy as np
from logging import getLogger
logger = getLogger(__name__)
def generate_demand(prices, distribution_method = np.random.normal, distribution_params = (50.0, 10.0)):
# assumption 1: each product has an intrinsic valuation drawn from a normal distribution centered at 50
product_valuations = distribution_method(*distribution_params, size=len(prices))
# assumption 2: demand decreases as price increases, following a simple linear model
demand = np.maximum(0, product_valuations - prices) # demand cannot be negative
total = np.sum(demand)
demand = demand / total * 100 if total > 0 else demand # normalize to percentage, avoid div by zero
logger.info(f"Generated demand for prices {prices}: {demand} with valuations from distribution {distribution_params}")
return demand
def estimate_demand(trajectories):
demand_estimate = {}
for traj in trajectories:
for event in traj:
if 'view_product' in event:
product_id = int(event.split('_')[-1].replace('product', ''))
demand_estimate[product_id] = demand_estimate.get(product_id, 0) + 1
total_views = sum(demand_estimate.values())
for product_id in demand_estimate:
demand_estimate[product_id] = (demand_estimate[product_id] / total_views) * 100 # normalize to percentage
return demand_estimate
# Example usage
if __name__ == "__main__":
np.random.seed(42)
prices = np.array([20.0, 35.0, 50.0, 65.0])
demand = generate_demand(prices)
print("Generated Demand:", demand)
from .behavior import sample_behavior
N, alphat =200, 0.1
trajectories = []
for _ in range(int(N*(1 - alphat))):
trajectories.append(sample_behavior(demand, human=True))
for _ in range(int(N*alphat)):
trajectories.append(sample_behavior(demand, human=False))
demand_estimate = estimate_demand(trajectories)
print("Estimated Demand from Behavior:", demand_estimate)
delta = {k: demand_estimate.get(k, 0) - demand[i] for i, k in enumerate(range(len(prices)))}
delta = np.mean([np.abs(v) for v in delta.values()])
print("Demand Delta:", delta)

126
engine/lib/render.py Normal file
View File

@@ -0,0 +1,126 @@
"""rendering logic for PHANTOM environment dashboard"""
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
def style_axis(ax, title: str = None, xlabel: str = None, ylabel: str = None):
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
if title: ax.set_title(title, fontsize=11, fontweight='bold', pad=8)
if xlabel: ax.set_xlabel(xlabel, fontsize=9)
if ylabel: ax.set_ylabel(ylabel, fontsize=9)
class DashboardRenderer:
"""stateful renderer for PHANTOM market dynamics visualization"""
def __init__(self):
self.fig = None
self.gs = None
def render(self, env) -> None:
if self.fig is None:
plt.ion()
self.fig = plt.figure(figsize=(14, 10))
self.gs = GridSpec(3, 3, figure=self.fig, hspace=0.35, wspace=0.3,
left=0.07, right=0.95, top=0.92, bottom=0.08)
plt.show(block=False)
self.fig.clear()
self.fig.suptitle(f'PHANTOM Market Dynamics [t={env._step_count}, a={env.alpha:.2f}]',
fontsize=14, fontweight='bold')
demand_mat = np.array(env._demand_history).T
price_mat = np.array(env._price_history).T
elasticity = env._compute_elasticity()
self._render_scatter(env)
self._render_elasticity_bar(env, elasticity)
self._render_session_pie(env)
self._render_price_heatmap(price_mat)
self._render_demand_heatmap(demand_mat)
self._render_correlation(env.n_products, price_mat, demand_mat)
self._render_revenue(env)
self.fig.canvas.draw_idle()
self.fig.canvas.flush_events()
def _render_scatter(self, env):
ax = self.fig.add_subplot(self.gs[0, 0])
prices_flat = np.array(env._price_history).flatten()
demands_flat = np.array(env._demand_history).flatten()
product_ids = np.tile(np.arange(env.n_products), len(env._price_history))
ax.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none')
if len(prices_flat) > 1:
z = np.polyfit(prices_flat, demands_flat, 1)
p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50)
ax.plot(p_line, np.polyval(z, p_line), '--', lw=1.5, alpha=0.8)
style_axis(ax, "Price-Demand Relationship", "Price ($)", "Demand")
def _render_elasticity_bar(self, env, elasticity):
ax = self.fig.add_subplot(self.gs[0, 1])
ax.barh(range(env.n_products), elasticity, alpha=0.8)
ax.axvline(0, lw=0.8, alpha=0.5)
ax.axvline(-1, lw=1, ls='--', alpha=0.5)
ax.set_yticks(range(env.n_products))
ax.set_yticklabels([f'P{i}' for i in range(env.n_products)], fontsize=7)
style_axis(ax, "Price Elasticity", "(dQ/dP)(P/Q)", None)
def _render_session_pie(self, env):
ax = self.fig.add_subplot(self.gs[0, 2])
n_h, n_a = env.market.Nhumans, env.market.Nagents
wedges, _ = ax.pie([n_h, n_a], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': 'white'})
ax.legend(wedges, [f'H ({n_h})', f'A ({n_a})'], loc='lower center', fontsize=8,
frameon=False, bbox_to_anchor=(0.5, -0.05))
ax.set_title("Session Mix", fontsize=11, fontweight='bold')
def _render_price_heatmap(self, price_mat):
ax = self.fig.add_subplot(self.gs[1, :2])
im = ax.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower')
style_axis(ax, "Price Heatmap P(product, t)", "Step", "Product")
cbar = self.fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02)
cbar.set_label('$', fontsize=8)
def _render_demand_heatmap(self, demand_mat):
ax = self.fig.add_subplot(self.gs[1, 2])
im = ax.imshow(demand_mat, aspect='auto', cmap='Blues', origin='lower')
style_axis(ax, "Demand Q(product, t)", "Step", None)
self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
def _render_correlation(self, n_products, price_mat, demand_mat):
ax = self.fig.add_subplot(self.gs[2, 0])
if price_mat.shape[1] > 2:
corr = np.corrcoef(price_mat, demand_mat)[:n_products, n_products:]
im = ax.imshow(corr, cmap='RdBu', vmin=-1, vmax=1, aspect='auto')
ax.set_xticks(range(n_products))
ax.set_yticks(range(n_products))
ax.set_xticklabels([f'Q{i}' for i in range(n_products)], fontsize=6)
ax.set_yticklabels([f'P{i}' for i in range(n_products)], fontsize=6)
self.fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
style_axis(ax, "Price-Demand Correlation", None, None)
def _render_revenue(self, env):
ax = self.fig.add_subplot(self.gs[2, 1:])
n_steps = len(env._revenue_history)
demand_std = [np.std(d) for d in env._demand_history]
ax.fill_between(range(n_steps), env._revenue_history, alpha=0.3)
ax.plot(env._revenue_history, linewidth=2, label='Revenue')
ax.set_xlim(0, max(n_steps, 1))
ax.set_ylim(0, max(env._revenue_history) * 1.1 if env._revenue_history else 1)
ax2 = ax.twinx()
ax2.plot(range(n_steps), demand_std, linewidth=2, ls='-', alpha=0.9, label='sigma(Demand)')
d_min, d_max = min(demand_std), max(demand_std)
margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5
ax2.set_ylim(max(0, d_min - margin), d_max + margin)
ax2.set_ylabel('Demand sigma', fontsize=9)
style_axis(ax, "Revenue & Demand Dispersion", "Step", "Revenue ($)")
ax.legend(loc='upper left', fontsize=7, frameon=False)
ax2.legend(loc='upper right', fontsize=7, frameon=False)
def close(self):
if self.fig:
plt.close(self.fig)
self.fig = None

34
engine/studies/factors.py Normal file
View File

@@ -0,0 +1,34 @@
"""shared factor definitions for experimental designs"""
import numpy as np
from dataclasses import dataclass, field
from typing import Callable, Any
@dataclass
class Factor:
name: str
levels: list
primary: bool = True # full cross vs sampled
# demand functions with compatible signatures
def demand_linear(mu, sigma, size): return np.maximum(0, np.random.normal(mu, sigma, size))
def demand_uniform(mu, sigma, size): return np.random.uniform(mu - sigma, mu + sigma, size)
def demand_exponential(mu, sigma, size): return np.random.exponential(mu, size)
def demand_logistic(mu, sigma, size): return np.random.logistic(mu, sigma, size)
DEMAND_FUNCTIONS = {
"linear": demand_linear,
"uniform": demand_uniform,
"exponential": demand_exponential,
"logistic": demand_logistic,
}
FACTORS = [
Factor("demand_fn", list(DEMAND_FUNCTIONS.keys()), primary=True),
Factor("alpha", [0.1, 0.3, 0.5, 0.7], primary=True),
Factor("n_products", [5, 15, 30, 50], primary=True),
Factor("demand_mu", [30.0, 50.0, 70.0], primary=False),
Factor("demand_sigma", [5.0, 10.0, 20.0], primary=False),
Factor("N", [100, 500, 1000], primary=False),
]
SEEDS_PER_CONFIG = 5

View File

@@ -0,0 +1,89 @@
"""full factorial design - all factor combinations"""
import sys
sys.path.insert(0, "..")
import logging
from itertools import product
import json
import hashlib
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
from .factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
def generate_configs():
"""generate all factor combinations with seeds"""
all_levels = [f.levels for f in FACTORS]
names = [f.name for f in FACTORS]
configs = []
for combo in product(*all_levels):
base = {names[i]: combo[i] for i in range(len(names))}
for seed in range(SEEDS_PER_CONFIG):
cfg = {**base, "seed": seed}
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
configs.append(cfg)
return configs
def run_single(cfg: dict) -> dict:
"""execute one experiment config, return metrics"""
from engine.wrapper import PHANTOM
import numpy as np
np.random.seed(cfg["seed"])
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
env = PHANTOM(
n_products=cfg["n_products"],
alpha=cfg["alpha"],
N=cfg["N"],
)
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
obs, _ = env.reset()
total_reward, steps = 0.0, 0
for _ in range(100):
action = env.action_space.sample()
obs, reward, term, trunc, _ = env.step(action)
total_reward += reward
steps += 1
if term: break
env.close()
return {
"id": cfg["id"],
"config": cfg,
"total_reward": total_reward,
"avg_reward": total_reward / steps if steps > 0 else 0.0,
"steps": steps,
}
def run_study(max_workers: int = None, output: str = "results_full.jsonl"):
configs = generate_configs()
log.info(f"full factorial: {len(configs)} configs ({len(configs)//SEEDS_PER_CONFIG} unique × {SEEDS_PER_CONFIG} seeds)")
results = []
with ProcessPoolExecutor(max_workers=max_workers) as ex:
for i, result in enumerate(ex.map(run_single, configs)):
results.append(result)
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
Path(output).write_text("\n".join(json.dumps(r) for r in results))
log.info(f"wrote {len(results)} results to {output}")
return results
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--workers", type=int, default=None)
p.add_argument("--output", default="results_full.jsonl")
p.add_argument("--dry-run", action="store_true", help="only show design size")
args = p.parse_args()
configs = generate_configs()
log.info(f"design: {len(configs)} runs | factors: {[f.name for f in FACTORS]} | levels: {[len(f.levels) for f in FACTORS]}")
if not args.dry_run:
run_study(args.workers, args.output)

106
engine/studies/mixed_lh.py Normal file
View File

@@ -0,0 +1,106 @@
"""mixed design: full factorial on primary factors, latin hypercube on secondary"""
import sys
sys.path.insert(0, "..")
import logging
from itertools import product
import json
import hashlib
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import numpy as np
from scipy.stats.qmc import LatinHypercube
from factors import FACTORS, DEMAND_FUNCTIONS, SEEDS_PER_CONFIG
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger(__name__)
LH_SAMPLES = 10
def generate_configs(lh_samples: int = LH_SAMPLES):
primary = [f for f in FACTORS if f.primary]
secondary = [f for f in FACTORS if not f.primary]
primary_grid = list(product(*[f.levels for f in primary]))
lhs = LatinHypercube(d=len(secondary), seed=42)
configs = []
for p_combo in primary_grid:
samples = lhs.random(n=lh_samples)
for s in samples:
sec_vals = {
secondary[i].name: secondary[i].levels[int(s[i] * len(secondary[i].levels))]
for i in range(len(secondary))
}
base = {primary[i].name: p_combo[i] for i in range(len(primary))}
base.update(sec_vals)
for seed in range(SEEDS_PER_CONFIG):
cfg = {**base, "seed": seed}
cfg["id"] = hashlib.md5(json.dumps(cfg, sort_keys=True).encode()).hexdigest()[:8]
configs.append(cfg)
return configs
def run_single(cfg: dict) -> dict:
from engine.wrapper import PHANTOM
import numpy as np
np.random.seed(cfg["seed"])
demand_fn = DEMAND_FUNCTIONS[cfg["demand_fn"]]
env = PHANTOM(
n_products=cfg["n_products"],
alpha=cfg["alpha"],
N=cfg["N"],
)
env.market.demand = (demand_fn, (cfg["demand_mu"], cfg["demand_sigma"]))
obs, _ = env.reset()
total_reward, steps = 0.0, 0
for _ in range(100):
action = env.action_space.sample()
obs, reward, term, trunc, _ = env.step(action)
total_reward += reward
steps += 1
if term: break
env.close()
return {
"id": cfg["id"],
"config": cfg,
"total_reward": total_reward,
"avg_reward": total_reward / steps,
"steps": steps,
}
def run_study(max_workers: int = None, output: str = "results_mixed.jsonl", lh_samples: int = LH_SAMPLES):
configs = generate_configs(lh_samples)
n_primary_cells = int(np.prod([len(f.levels) for f in FACTORS if f.primary]))
log.info(f"mixed LH: {len(configs)} configs ({n_primary_cells} primary × {lh_samples} LH × {SEEDS_PER_CONFIG} seeds)")
results = []
with ProcessPoolExecutor(max_workers=max_workers) as ex:
for i, result in enumerate(ex.map(run_single, configs)):
results.append(result)
if (i+1) % 100 == 0: log.info(f"progress: {i+1}/{len(configs)}")
Path(output).write_text("\n".join(json.dumps(r) for r in results))
log.info(f"wrote {len(results)} results to {output}")
return results
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--workers", type=int, default=None)
p.add_argument("--output", default="results_mixed.jsonl")
p.add_argument("--lh-samples", type=int, default=10)
p.add_argument("--dry-run", action="store_true", help="only show design size")
args = p.parse_args()
primary = [f for f in FACTORS if f.primary]
secondary = [f for f in FACTORS if not f.primary]
configs = generate_configs(args.lh_samples)
log.info(f"design: {len(configs)} runs | primary: {[f.name for f in primary]} | secondary (LH): {[f.name for f in secondary]}")
if not args.dry_run:
run_study(args.workers, args.output, args.lh_samples)

45
engine/train.py Normal file
View File

@@ -0,0 +1,45 @@
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import EvalCallback, BaseCallback
from .wrapper import PHANTOM
class RenderCallback(BaseCallback):
"""Renders environment on every step for live visualization."""
def __init__(self, env: PHANTOM):
super().__init__()
self.env = env
def _on_step(self) -> bool:
self.env.render()
return True
env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
eval_env = PHANTOM(n_products=10, alpha=0.3, render_mode=None)
model = SAC(
"MultiInputPolicy",
env,
verbose=1,
learning_rate=3e-4,
buffer_size=50000,
batch_size=256,
tau=0.005,
gamma=0.99,
)
render_cb = RenderCallback(env)
eval_cb = EvalCallback(eval_env, eval_freq=1000, n_eval_episodes=5, verbose=1)
model.learn(total_timesteps=50000, callback=[render_cb, eval_cb])
model.save("phantom_sac")
# test trained policy
env = PHANTOM(n_products=10, alpha=0.3, render_mode="human")
obs, _ = env.reset()
for _ in range(100):
action, _ = model.predict(obs, deterministic=True)
obs, reward, term, trunc, _ = env.step(action)
env.render()
if term or trunc: break
env.close()

118
engine/wrapper.py Normal file
View File

@@ -0,0 +1,118 @@
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from .engine import Limbo, MarketEngine, PricingEngine
from .lib.render import DashboardRenderer
class PHANTOM(gym.Env):
"""Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand."""
metadata = {"render_modes": ["human", "ansi"]}
def __init__(self,
n_products: int = 10,
alpha: float = 0.3,
N: int = 100,
price_bounds: tuple = (10.0, 150.0),
lambda_coi: float = 0.1,
render_mode: str = None):
super().__init__()
self.n_products = n_products
self.price_bounds = price_bounds
self.lambda_coi = lambda_coi
self.render_mode = render_mode
self.alpha = alpha
self.N = N
self.market = MarketEngine(alpha=alpha, N=N)
self._platform_stub = PricingEngine()
self._limbo = Limbo(self._platform_stub, self.market)
self.action_space = spaces.Box(
low=price_bounds[0], high=price_bounds[1],
shape=(n_products,), dtype=np.float32
)
self.observation_space = spaces.Dict({
"demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32),
"prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32),
})
self._prices = None
self._demand = None
self._step_count = 0
self._demand_history = []
self._price_history = []
self._revenue_history = []
self._renderer = None
def _get_obs(self) -> dict:
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
# TODO: implement supra-competitive price punishment
return float(revenue)
def _record_history(self):
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
self._demand_history.append(demand_arr)
self._price_history.append(self._prices.copy())
self._revenue_history.append(np.sum(self._prices * demand_arr))
def reset(self, seed=None, options=None):
super().reset(seed=seed)
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
self._demand = self.market.act(self._prices)
self._step_count = 0
self._demand_history, self._price_history, self._revenue_history = [], [], []
self._record_history()
return self._get_obs(), {}
def step(self, action: np.ndarray):
self._prices = np.clip(action, *self.price_bounds)
self._demand = self.market.act(self._prices)
self._step_count += 1
self._record_history()
reward = self._compute_reward(self._prices, self._demand)
terminated = self._step_count >= 100
return self._get_obs(), reward, terminated, False, {"step": self._step_count}
def _compute_elasticity(self) -> np.ndarray:
"""point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
if len(self._price_history) < 2:
return np.zeros(self.n_products)
p, q = np.array(self._price_history), np.array(self._demand_history)
dp, dq = np.diff(p, axis=0), np.diff(q, axis=0)
valid = np.abs(dp) > 0.5
with np.errstate(divide='ignore', invalid='ignore'):
elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0)
elasticity = np.nan_to_num(np.clip(elasticity, -5.0, 5.0), nan=0.0)
return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products)
def render(self):
if self.render_mode == "human":
if self._renderer is None:
self._renderer = DashboardRenderer()
self._renderer.render(self)
elif self.render_mode == "ansi":
return f"step={self._step_count}, prices={self._prices}, demand={self._demand}"
return None
def close(self):
if self._renderer:
self._renderer.close()
self._renderer = None
if __name__ == "__main__":
env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human")
obs, _ = env.reset()
for step in range(100):
action = env.action_space.sample()
obs, reward, term, trunc, info = env.step(action)
env.render()
if term: break
env.close()

117
experiments/agents/run.py Normal file
View File

@@ -0,0 +1,117 @@
from supabase import create_client, Client
import os
import random
import asyncio
import json
from dotenv import load_dotenv
from experiments.agents.agent import get_agent, AgentTypes
from lib.kafka_client import get_interactions
load_dotenv()
RESULTS="/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
client = create_client(
os.getenv("NEXT_PUBLIC_SUPABASE_URL"),
os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
)
def pick_random_task():
mode = 'hotel'
tasks = client.table("tasks").select("*").execute().data
if mode == 'hotel':
# drop all that have 'flight' in the description
tasks = [task for task in tasks if 'flight' not in task['task_description'].lower()]
return random.choice(tasks) if tasks else None
def clear_kafka_data():
"""Delete and recreate Kafka topics to clear all data"""
from kafka.admin import KafkaAdminClient, NewTopic
from kafka.errors import UnknownTopicOrPartitionError
import time
kafka_host = os.getenv('KAFKA_HOST', 'localhost')
kafka_port = os.getenv('KAFKA_PORT', '9092')
broker = f'{kafka_host}:{kafka_port}'
admin = KafkaAdminClient(bootstrap_servers=broker)
topics = ['user-interactions', 'price-logs']
try:
admin.delete_topics(topics, timeout_ms=5000)
print(f"Deleted topics: {topics}")
time.sleep(2)
except UnknownTopicOrPartitionError:
print("Topics don't exist, skipping delete")
except Exception as e:
print(f"Error deleting topics: {e}")
new_topics = [
NewTopic(name='user-interactions', num_partitions=3, replication_factor=1),
NewTopic(name='price-logs', num_partitions=3, replication_factor=1)
]
try:
admin.create_topics(new_topics=new_topics, validate_only=False)
print(f"Recreated topics: {topics}")
except Exception as e:
print(f"Error creating topics: {e}")
finally:
admin.close()
def create_new_experiment(task_id):
import uuid
subject_name = f"agent_{str(uuid.uuid4())[:8]}"
experiment = {
"subject_name": subject_name,
"xp_human_only": False,
"xp_market_mode": "hotel",
"xp_task_id": task_id,
}
response = client.table("experiments").insert(experiment).execute()
return response.data[0] if response.data else None
if __name__ == "__main__":
clear_kafka_data()
task = pick_random_task()
if not task:
print("No tasks available")
exit(1)
experiment = create_new_experiment(task['id'])
exp_id = experiment['id']
exp_dir = f"{RESULTS}{exp_id}"
os.makedirs(exp_dir, exist_ok=True)
# construct experiment URL with uuid param
base_url = os.getenv('NEXT_PUBLIC_API_BASE', 'http://localhost:3000')
agent_url = f"{base_url}/start-task?uuid={exp_id}"
print(f"Created experiment {exp_id} for task {task['id']}")
print(f"Agent will interact with: {agent_url}")
# instantiate and run agent
agent = get_agent(
AgentTypes.GENERIC_BROWSER_USE_AGENT,
goal=task['task_description'],
url=agent_url,
timeout=300,
headless=True
)
result = asyncio.run(agent.act())
print(f"Agent result: {result}")
# export interaction and price data from kafka
interactions = get_interactions(topic='user-interactions', timeout_ms=3000)
prices = get_interactions(topic='price-logs', timeout_ms=3000)
with open(f"{exp_dir}/int.json", 'w') as f:
json.dump(interactions, f, indent=2)
with open(f"{exp_dir}/price.json", 'w') as f:
json.dump(prices, f, indent=2)
print(f"Experiment {exp_id} completed.")
print(f"Exported {len(interactions)} interactions and {len(prices)} price logs to {exp_dir}")

View File

@@ -1,3 +1,4 @@
from pandas.core.algorithms import factorize_array
from airflow import DAG from airflow import DAG
from airflow.operators.python import PythonOperator from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago from airflow.utils.dates import days_ago
@@ -208,3 +209,12 @@ def create_surge_pricing_dag(store_mode: str) -> DAG:
# instantiate DAGs for Airflow to discover # instantiate DAGs for Airflow to discover
dag_airline = create_surge_pricing_dag('airline') dag_airline = create_surge_pricing_dag('airline')
dag_hotel = create_surge_pricing_dag('hotel') dag_hotel = create_surge_pricing_dag('hotel')
# TODO: Refactor this factory from a surge pricing factory to a general pricing factory
# We will do this by passing a pricing strategy class to the factory, since the generic pipeline is:
# take all interaction data, group by sessionId and assign a new price vector to each session
# in the grouping we get a subset of the interactions per sessionId and we can map that to some Features
# we define a custom _get_features(interactions .) methodin the strategy class
# we then run only the inference which is the .predict(trajectory) per-session which will give us a new price vector
# this we then publish for each sessionId group
# this might include no deleting most of the pricers we have defined and starting with a super simple surge-pricing algorithm that is no-fit only predict. This we can then test end-to-end and observe changes to prices according to a desired strategy - we have to define this one as a very short term strategy because we run sessions that take only a few minutes.

View File

@@ -120,15 +120,31 @@ def apply_surge_pricing(**kwargs):
# rename demand_score to demand for pricer compatibility # rename demand_score to demand for pricer compatibility
data = product_features.rename(columns={'demand_score': 'demand'}) data = product_features.rename(columns={'demand_score': 'demand'})
high_thresh = dag_conf.get('high_threshold', 10)
low_thresh = dag_conf.get('low_threshold', 2)
surge_mult = dag_conf.get('surge_multiplier', 1.2)
discount_mult = dag_conf.get('discount_multiplier', 0.9)
logging.info(f"Surge pricing config: high_thresh={high_thresh}, low_thresh={low_thresh}, surge_mult={surge_mult}, discount_mult={discount_mult}")
logging.info(f"Demand stats: min={data['demand'].min():.2f}, max={data['demand'].max():.2f}, mean={data['demand'].mean():.2f}")
logging.info(f"Products with high demand (>={high_thresh}): {(data['demand'] >= high_thresh).sum()}")
logging.info(f"Products with low demand (<={low_thresh}): {(data['demand'] <= low_thresh).sum()}")
surge_pricer = SimpleSurgePricer( surge_pricer = SimpleSurgePricer(
high_threshold=dag_conf.get('high_threshold', 10), high_threshold=high_thresh,
low_threshold=dag_conf.get('low_threshold', 2), low_threshold=low_thresh,
surge_multiplier=dag_conf.get('surge_multiplier', 1.2), surge_multiplier=surge_mult,
discount_multiplier=dag_conf.get('discount_multiplier', 0.9) discount_multiplier=discount_mult
) )
surge_pricer.fit(data) surge_pricer.fit(data)
data['optimal_price'] = surge_pricer.predict() data['optimal_price'] = surge_pricer.predict()
base_avg = data['base_price'].mean()
optimal_avg = data['optimal_price'].mean()
price_change_pct = ((optimal_avg - base_avg) / base_avg) * 100
logging.info(f"Price adjustment: base_avg={base_avg:.2f}, optimal_avg={optimal_avg:.2f}, change={price_change_pct:+.1f}%")
prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={ prices_df = data[['productId', 'price', 'base_price', 'optimal_price', 'demand']].rename(columns={
'price': 'current_price', 'price': 'current_price',
'demand': 'demand_score' 'demand': 'demand_score'

View File

@@ -1,11 +1,21 @@
from .evals import evaluate from .evals import evaluate
from .arch import ( from .arch import (
XGBoostAgentClassifier, XGBoostAgentClassifier,
LightGBMAgentClassifier LightGBMAgentClassifier,
ContrastiveWeakClassifier,
TrajectoryEncoder,
WeakClassifier,
contrastive_loss,
featurize_trajectory,
) )
__all__ =[ __all__ = [
'evaluate', 'evaluate',
'XGBoostAgentClassifier', 'XGBoostAgentClassifier',
'LightGBMAgentClassifier' 'LightGBMAgentClassifier',
'ContrastiveWeakClassifier',
'TrajectoryEncoder',
'WeakClassifier',
'contrastive_loss',
'featurize_trajectory',
] ]

View File

@@ -1,122 +1,212 @@
# sklearn compatible models for agent detection # sklearn compatible models for agent detection
from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.base import BaseEstimator, ClassifierMixin
from procesing.context import PipelineContext from typing import Any, Optional, Tuple, Dict, List
from typing import Any, Optional, Tuple
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import xgboost as xgb from collections import defaultdict
import lightgbm as lgb
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
from pathlib import Path
# add lib to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'lib'))
from lib.features import (
transition_histogram as _lib_transition_histogram,
temporal_signature as _lib_temporal_signature,
state_coverage as _lib_state_coverage,
transition_entropy as _lib_transition_entropy,
featurize_trajectory as _lib_featurize_trajectory,
parse_timestamp
)
from lib.state import event_to_state, get_event_name, get_timestamp
TASK = 'classification' TASK = 'classification'
LABELS = ['human', 'agent'] LABELS = ['human', 'agent']
class BaseAgentClassifier(BaseEstimator, ClassifierMixin, ABC): class WeakClassifier(BaseEstimator, ClassifierMixin, ABC):
"""Base class for tree-based agent detection classifiers with common logic""" # a simple contrastive machine learning model learns to distinguish human/agent behavior
# using weakly supervised contrastive learning + augmentation
def __init__(self, **kwargs):
super().__init__()
self.model = None
self.kwargs = kwargs
def __init__(self, context: Optional[PipelineContext] = None, n_estimators: int = 200,
max_depth: int = 6, learning_rate: float = 0.05, class TrajectoryEncoder(nn.Module):
early_stopping_rounds: int = 20): """Encode variable-length event sequences to fixed-dim embedding via bidirectional LSTM"""
self.context = context def __init__(self, input_dim: int, embed_dim: int = 32, hidden_dim: int = 64):
super().__init__()
self.event_embed = nn.Linear(input_dim, hidden_dim)
self.lstm = nn.LSTM(hidden_dim, hidden_dim, batch_first=True, bidirectional=True)
self.proj = nn.Linear(hidden_dim * 2, embed_dim)
def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (batch, seq_len, input_dim)
h = F.relu(self.event_embed(x))
_, (hn, _) = self.lstm(h)
hn = torch.cat([hn[-2], hn[-1]], dim=1) # concat bidirectional hidden states
return F.normalize(self.proj(hn), dim=1) # L2 normalized
class ContrastiveWeakClassifier(WeakClassifier):
"""Contrastive learning classifier for human/agent trajectory discrimination"""
def __init__(self, input_dim: int = 64, embed_dim: int = 32, margin: float = 1.0, **kwargs):
super().__init__(**kwargs)
self.input_dim = input_dim
self.embed_dim = embed_dim
self.margin = margin
self.encoder = TrajectoryEncoder(input_dim, embed_dim)
self.classifier = nn.Linear(embed_dim, 2)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self._fitted = False
def to_device(self):
self.encoder.to(self.device)
self.classifier.to(self.device)
return self
def encode(self, x: torch.Tensor) -> torch.Tensor:
return self.encoder(x.to(self.device))
def forward(self, x: torch.Tensor) -> torch.Tensor:
emb = self.encode(x)
return self.classifier(emb)
def fit(self, X, y=None): # sklearn interface - actual training in weak.train.py
self._fitted = True
return self
def predict(self, X: np.ndarray) -> np.ndarray:
self.encoder.eval()
self.classifier.eval()
with torch.no_grad():
x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
logits = self.forward(x)
return torch.argmax(logits, dim=1).cpu().numpy()
def predict_proba(self, X: np.ndarray) -> np.ndarray:
self.encoder.eval()
self.classifier.eval()
with torch.no_grad():
x = torch.tensor(X, dtype=torch.float32).unsqueeze(1).to(self.device)
logits = self.forward(x)
return F.softmax(logits, dim=1).cpu().numpy()
def contrastive_loss(anchor: torch.Tensor, positive: torch.Tensor, negative: torch.Tensor, margin: float = 0.3) -> torch.Tensor:
"""Triplet loss using cosine similarity (for L2-normalized embeddings). margin in [0,1] range."""
pos_sim = F.cosine_similarity(anchor, positive) # higher = more similar
neg_sim = F.cosine_similarity(anchor, negative)
return F.relu(neg_sim - pos_sim + margin).mean() # want pos_sim > neg_sim + margin
def nt_xent_loss(z_i: torch.Tensor, z_j: torch.Tensor, temperature: float = 0.5) -> torch.Tensor:
"""Normalized temperature-scaled cross entropy loss (SimCLR style)"""
batch_size = z_i.size(0)
z = torch.cat([z_i, z_j], dim=0) # (2N, embed_dim)
sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2) / temperature
mask = torch.eye(2 * batch_size, dtype=torch.bool, device=z.device)
sim.masked_fill_(mask, -float('inf'))
labels = torch.arange(batch_size, device=z.device)
labels = torch.cat([labels + batch_size, labels]) # positive pairs
return F.cross_entropy(sim, labels)
# feature extraction utilities - delegating to lib.features for unified implementation
# these wrappers maintain backwards compatibility for existing imports
def transition_histogram(events: List, state_fn, max_states: int = 50) -> np.ndarray:
"""Compute normalized histogram of state transitions in trajectory"""
return _lib_transition_histogram(events, state_fn, max_states)
def temporal_signature(events: List, ts_fn) -> np.ndarray:
"""Extract temporal features: mean/std/skew of inter-event times"""
return _lib_temporal_signature(events, ts_fn)
def state_coverage(events: List, state_fn, mdp_states: set) -> float:
"""Fraction of MDP states visited by trajectory"""
return _lib_state_coverage(events, state_fn, mdp_states)
def transition_entropy(events: List, state_fn) -> float:
"""Compute entropy of transition distribution (randomness of navigation)"""
return _lib_transition_entropy(events, state_fn)
def featurize_trajectory(events: List, mdp: Optional[Dict] = None, input_dim: int = 64) -> np.ndarray:
"""Convert trajectory to fixed-dim feature vector - uses lib.features implementation"""
mdp_states = set(mdp.get('states', [])) if mdp else set()
def _ts_fn(e):
return parse_timestamp(get_timestamp(e))
def _event_name_fn(e):
return get_event_name(e)
return _lib_featurize_trajectory(events, event_to_state, _ts_fn, _event_name_fn, mdp_states, input_dim)
# gradient boosting classifiers for comparison baselines
class XGBoostAgentClassifier(BaseEstimator, ClassifierMixin):
"""XGBoost classifier for human/agent detection from session features"""
def __init__(self, n_estimators: int = 100, max_depth: int = 6, learning_rate: float = 0.1, **kwargs):
self.n_estimators = n_estimators self.n_estimators = n_estimators
self.max_depth = max_depth self.max_depth = max_depth
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.early_stopping_rounds = early_stopping_rounds self.model = None
self.model_ = None self.kwargs = kwargs
self.feature_names_ = None
def _to_array(self, X):
"""Convert pandas structures to numpy arrays"""
return X.values if isinstance(X, (pd.DataFrame, pd.Series)) else X
def _compute_pos_weight(self, y_arr):
"""Calculate scale_pos_weight for class imbalance handling"""
n_neg, n_pos = (y_arr == 0).sum(), (y_arr == 1).sum()
return n_neg / n_pos if n_pos > 0 else 1.0
def _prepare_eval_set(self, eval_set):
"""Convert eval_set to numpy arrays if needed"""
if not eval_set:
return None
X_val, y_val = eval_set[0]
return [(self._to_array(X_val), self._to_array(y_val))]
@abstractmethod
def _build_model(self, scale_pos: float):
"""Build the underlying model instance (must be implemented by subclasses)"""
pass
@abstractmethod
def _fit_with_eval(self, X_arr, y_arr, eval_arr):
"""Fit model with evaluation set (must be implemented by subclasses)"""
pass
def fit(self, X, y, eval_set=None):
X_arr, y_arr = self._to_array(X), self._to_array(y)
if isinstance(X, pd.DataFrame):
self.feature_names_ = X.columns.tolist()
scale_pos = self._compute_pos_weight(y_arr)
self.model_ = self._build_model(scale_pos)
eval_arr = self._prepare_eval_set(eval_set)
if eval_arr:
self._fit_with_eval(X_arr, y_arr, eval_arr)
else:
self.model_.fit(X_arr, y_arr)
def fit(self, X: np.ndarray, y: np.ndarray):
try:
import xgboost as xgb
self.model = xgb.XGBClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
learning_rate=self.learning_rate, **self.kwargs)
self.model.fit(X, y)
except ImportError:
raise ImportError("xgboost required for XGBoostAgentClassifier")
return self return self
def predict(self, X): def predict(self, X: np.ndarray) -> np.ndarray:
return self.model_.predict(self._to_array(X)) if self.model is None:
raise ValueError("fit the model first")
return self.model.predict(X)
def predict_proba(self, X): def predict_proba(self, X: np.ndarray) -> np.ndarray:
return self.model_.predict_proba(self._to_array(X)) if self.model is None:
raise ValueError("fit the model first")
@property return self.model.predict_proba(X)
def feature_importances_(self):
return self.model_.feature_importances_ if self.model_ else None
class XGBoostAgentClassifier(BaseAgentClassifier): class LightGBMAgentClassifier(BaseEstimator, ClassifierMixin):
"""XGBoost binary classifier for agent detection with class imbalance handling""" """LightGBM classifier for human/agent detection from session features"""
def __init__(self, n_estimators: int = 100, max_depth: int = -1, learning_rate: float = 0.1, **kwargs):
self.n_estimators = n_estimators
self.max_depth = max_depth
self.learning_rate = learning_rate
self.model = None
self.kwargs = kwargs
def _build_model(self, scale_pos: float): def fit(self, X: np.ndarray, y: np.ndarray):
return xgb.XGBClassifier( try:
n_estimators=self.n_estimators, import lightgbm as lgb
max_depth=self.max_depth, self.model = lgb.LGBMClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
learning_rate=self.learning_rate, learning_rate=self.learning_rate, verbose=-1, **self.kwargs)
scale_pos_weight=scale_pos, self.model.fit(X, y)
eval_metric='auc', except ImportError:
early_stopping_rounds=self.early_stopping_rounds, raise ImportError("lightgbm required for LightGBMAgentClassifier")
random_state=42, return self
tree_method='hist',
enable_categorical=False
)
def _fit_with_eval(self, X_arr, y_arr, eval_arr): def predict(self, X: np.ndarray) -> np.ndarray:
self.model_.fit(X_arr, y_arr, eval_set=eval_arr, verbose=False) if self.model is None:
raise ValueError("fit the model first")
return self.model.predict(X)
def predict_proba(self, X: np.ndarray) -> np.ndarray:
class LightGBMAgentClassifier(BaseAgentClassifier): if self.model is None:
"""LightGBM binary classifier for agent detection with class imbalance handling""" raise ValueError("fit the model first")
return self.model.predict_proba(X)
def _build_model(self, scale_pos: float):
return lgb.LGBMClassifier(
n_estimators=self.n_estimators,
max_depth=self.max_depth,
learning_rate=self.learning_rate,
scale_pos_weight=scale_pos,
metric='auc',
random_state=42,
verbosity=-1
)
def _fit_with_eval(self, X_arr, y_arr, eval_arr):
self.model_.fit(
X_arr, y_arr,
eval_set=eval_arr,
callbacks=[lgb.early_stopping(self.early_stopping_rounds, verbose=False)]
)

View File

@@ -0,0 +1,246 @@
import sys
sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/sim/rl/behavior_loader")
sys.path.insert(0, "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml")
from sim.rl.behavior_loader.loader import AgentLoader, Loader, JointLoader, PayloadModel
from sim.rl.behavior_loader.models import JointBehaviorModel
from arch import ContrastiveWeakClassifier, contrastive_loss, featurize_trajectory
from typing import List, Optional, Dict
from datetime import datetime, timedelta
from copy import deepcopy
import numpy as np
import random
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
RUNS_DIR = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/ml/runs"
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
def _perturb_ts(evt: PayloadModel, jitter_ms: int = 500) -> PayloadModel:
"""Add random jitter to event timestamp"""
new_evt = deepcopy(evt)
try:
ts = datetime.fromisoformat(evt.ts.replace('Z', '+00:00'))
delta = timedelta(milliseconds=random.randint(-jitter_ms, jitter_ms))
new_evt.ts = (ts + delta).isoformat()
except:
pass
return new_evt
def augment_trajectory(trajectory: List[PayloadModel], rate: float = 0.1) -> List[PayloadModel]:
"""Apply random augmentation to trajectory for contrastive learning"""
if len(trajectory) < 2:
return trajectory
aug_type = random.choice(['window', 'shuffle', 'noise', 'drop'])
if aug_type == 'window': # random contiguous sub-sequence (70-100% length)
min_len = max(2, int(len(trajectory) * 0.7))
sub_len = random.randint(min_len, len(trajectory))
start = random.randint(0, len(trajectory) - sub_len)
return trajectory[start:start + sub_len]
elif aug_type == 'shuffle': # swap adjacent pairs with probability rate
result = list(trajectory)
for i in range(len(result) - 1):
if random.random() < rate:
result[i], result[i + 1] = result[i + 1], result[i]
return result
elif aug_type == 'drop': # drop events with probability rate
result = [e for e in trajectory if random.random() > rate]
return result if len(result) >= 2 else trajectory[:2]
elif aug_type == 'noise': # perturb timestamps
return [_perturb_ts(e, jitter_ms=500) for e in trajectory]
return trajectory
class TripletDataset(Dataset):
"""Generate (anchor, positive, negative) triplets on-the-fly with augmentation"""
def __init__(self, data: Dict[str, List[PayloadModel]], mdp: Optional[Dict], augment_fn, input_dim: int = 64, multiplier: int = 10):
self.sessions = list(data.items())
self.human_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('human_')]
self.agent_ids = [i for i, (sid, _) in enumerate(self.sessions) if sid.startswith('agent_')]
self.mdp = mdp
self.augment = augment_fn
self.input_dim = input_dim
self.multiplier = multiplier
if not self.human_ids or not self.agent_ids:
raise ValueError(f"Need both human ({len(self.human_ids)}) and agent ({len(self.agent_ids)}) sessions")
def __len__(self) -> int:
return len(self.sessions) * self.multiplier
def __getitem__(self, idx: int):
anchor_idx = idx % len(self.sessions)
sid, events = self.sessions[anchor_idx]
is_human = sid.startswith('human_')
anchor = featurize_trajectory(events, self.mdp, self.input_dim)
positive = featurize_trajectory(self.augment(events), self.mdp, self.input_dim)
neg_pool = self.agent_ids if is_human else self.human_ids
neg_idx = random.choice(neg_pool)
negative = featurize_trajectory(self.sessions[neg_idx][1], self.mdp, self.input_dim)
label = 0 if is_human else 1 # 0=human, 1=agent
return (torch.tensor(anchor, dtype=torch.float32),
torch.tensor(positive, dtype=torch.float32),
torch.tensor(negative, dtype=torch.float32),
torch.tensor(label, dtype=torch.long))
def train(epochs: int = 100, lr: float = 1e-3, batch_size: int = 4, input_dim: int = 64,
embed_dim: int = 32, margin: float = 0.3, verbose: bool = True, run_name: str = None):
"""Train contrastive weak classifier on human/agent trajectories"""
joint = JointLoader(human_dir, agent_dir)
data = joint.get_data()
if verbose:
print(f"Loaded {len(data)} sessions")
joint_model = JointBehaviorModel(human_dir, agent_dir)
ref_mdp = joint_model.build_MDP()
dataset = TripletDataset(data, ref_mdp, augment_trajectory, input_dim=input_dim)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)
model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
model.to_device()
run_name = run_name or f"d{input_dim}_e{embed_dim}_lr{lr}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
writer = SummaryWriter(f"{RUNS_DIR}/train/{run_name}")
optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
ce_loss_fn = torch.nn.CrossEntropyLoss()
best_loss = float('inf')
for epoch in range(epochs):
model.encoder.train()
model.classifier.train()
total_loss, n_batches = 0.0, 0
for anchor, positive, negative, labels in loader:
anchor, positive, negative, labels = [t.to(model.device) for t in [anchor, positive, negative, labels]]
z_a, z_p, z_n = [model.encoder(t.unsqueeze(1)) for t in [anchor, positive, negative]]
trip_loss = contrastive_loss(z_a, z_p, z_n, margin=model.margin)
ce = ce_loss_fn(model.classifier(z_a), labels)
loss = trip_loss + 0.5 * ce
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
n_batches += 1
avg_loss = total_loss / max(n_batches, 1)
writer.add_scalar('loss', avg_loss, epoch)
if verbose and (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
if avg_loss < best_loss:
best_loss = avg_loss
writer.close()
if verbose:
print(f"Done. Best={best_loss:.4f} TB:{RUNS_DIR}/train/{run_name}")
return model, ref_mdp
def evaluate_loocv(input_dim: int = 64, embed_dim: int = 32, epochs_per_fold: int = 50,
lr: float = 1e-3, margin: float = 0.3, run_name: str = None):
"""Leave-one-out cross-validation given limited samples"""
joint = JointLoader(human_dir, agent_dir)
data = joint.get_data()
session_ids = list(data.keys())
joint_model = JointBehaviorModel(human_dir, agent_dir)
ref_mdp = joint_model.build_MDP()
run_name = run_name or f"loocv_d{input_dim}_e{embed_dim}_m{margin}_{datetime.now():%Y%m%d_%H%M%S}"
writer = SummaryWriter(f"{RUNS_DIR}/eval/{run_name}")
predictions, actuals = [], []
for fold_idx, test_sid in enumerate(session_ids):
train_data = {k: v for k, v in data.items() if k != test_sid}
test_events = data[test_sid]
test_label = 0 if test_sid.startswith('human_') else 1
n_human = sum(1 for k in train_data if k.startswith('human_'))
n_agent = sum(1 for k in train_data if k.startswith('agent_'))
if n_human == 0 or n_agent == 0:
continue
try:
dataset = TripletDataset(train_data, ref_mdp, augment_trajectory, input_dim=input_dim, multiplier=5)
loader = DataLoader(dataset, batch_size=2, shuffle=True, drop_last=True)
model = ContrastiveWeakClassifier(input_dim=input_dim, embed_dim=embed_dim, margin=margin)
model.to_device()
optimizer = Adam(list(model.encoder.parameters()) + list(model.classifier.parameters()), lr=lr)
model.encoder.train()
model.classifier.train()
for _ in range(epochs_per_fold):
for anchor, positive, negative, labels in loader:
z_a, z_p, z_n = [model.encoder(t.unsqueeze(1).to(model.device)) for t in [anchor, positive, negative]]
loss = contrastive_loss(z_a, z_p, z_n, margin=margin)
optimizer.zero_grad()
loss.backward()
optimizer.step()
test_feat = featurize_trajectory(test_events, ref_mdp, input_dim)
pred = model.predict(test_feat.reshape(1, -1))[0]
predictions.append(pred)
actuals.append(test_label)
print(f" {test_sid[:12]}...: pred={pred}, actual={test_label}, {'OK' if pred == test_label else 'MISS'}")
except Exception as e:
print(f"Error: {e}")
if predictions:
acc = sum(p == a for p, a in zip(predictions, actuals)) / len(predictions)
tp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 1)
fp = sum(1 for p, a in zip(predictions, actuals) if p == 1 and a == 0)
fn = sum(1 for p, a in zip(predictions, actuals) if p == 0 and a == 1)
prec, rec = tp / max(tp + fp, 1), tp / max(tp + fn, 1)
f1 = 2 * prec * rec / max(prec + rec, 1e-10)
writer.add_scalar('accuracy', acc, 0)
writer.add_scalar('f1', f1, 0)
writer.add_scalar('precision', prec, 0)
writer.add_scalar('recall', rec, 0)
writer.close()
print(f"\nAccuracy: {acc:.2%} F1: {f1:.3f} TB:{RUNS_DIR}/eval/{run_name}")
return acc, predictions, actuals
writer.close()
return 0.0, [], []
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['train', 'eval'], default='train')
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--lr', type=float, default=1e-3)
parser.add_argument('--margin', type=float, default=0.3)
parser.add_argument('--input-dim', type=int, default=64)
parser.add_argument('--embed-dim', type=int, default=32)
parser.add_argument('--run-name', type=str, default=None)
args = parser.parse_args()
if args.mode == 'train':
model, mdp = train(epochs=args.epochs, lr=args.lr, input_dim=args.input_dim,
embed_dim=args.embed_dim, margin=args.margin, run_name=args.run_name)
else:
evaluate_loocv(input_dim=args.input_dim, embed_dim=args.embed_dim, epochs_per_fold=args.epochs,
lr=args.lr, margin=args.margin, run_name=args.run_name)

View File

@@ -0,0 +1,114 @@
from __future__ import annotations
import os
import random
from pathlib import Path
from types import SimpleNamespace
import pandas as pd
from lib.separability import estimate_alpha, load_artifacts, score_session
# use relative import when in package context, fallback for standalone
try:
from sim.rl.behavior_loader.models import AgentBehaviorModel
except ImportError:
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "sim" / "rl" / "behavior_loader"))
from models import AgentBehaviorModel
# paths should be configurable via environment or relative to project root
PROJECT_ROOT = Path(__file__).parent.parent.parent
AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', PROJECT_ROOT / "experiments" / "agents" / "collected_data"))
try:
SEPARABILITY_ARTIFACTS = load_artifacts()
except FileNotFoundError:
SEPARABILITY_ARTIFACTS = None
def remap_schema(df: pd.DataFrame, mapping: dict, on: str = "event_type") -> pd.DataFrame:
"""remap column values according to mapping dict, preserving unmapped values"""
df = df.copy()
df[on] = df[on].map(mapping).fillna(df[on])
return df
def _states_to_events(states: list[str]) -> list[SimpleNamespace]:
events: list[SimpleNamespace] = []
for idx, state in enumerate(states):
parts = state.split("|") if isinstance(state, str) else ["page", "product", str(state)]
page = f"/{parts[0]}" if parts else "/"
product = parts[1] if len(parts) > 1 else "unknown"
event_name = parts[2] if len(parts) > 2 else parts[-1]
events.append(
SimpleNamespace(
eventName=event_name,
page=page,
productId=product,
ts=float(idx),
)
)
return events
def contaminate_dataset(df: pd.DataFrame, on: str = "event_type",
contamination_rate: float = 0.1,
agent_data_dir: Path = None) -> pd.DataFrame:
"""inject synthetic agent trajectories into a dataset
contamination_rate: fraction of final dataset that should be agent data (0.1 = 10% agents)
"""
data_dir = agent_data_dir or AGENT_DATA_DIR
model = AgentBehaviorModel(str(data_dir))
model.build_MDP() # ensure MDP is built before sampling
# compute event distribution from original data
event_dist = df[on].value_counts(normalize=True).to_dict()
total = sum(event_dist.values())
event_dist = {k: v / total for k, v in event_dist.items()}
# calculate how many synthetic events to add
N = len(df)
N_final = N / (1 - contamination_rate)
N_contaminate = int(N_final - N)
# sample start states weighted by original distribution
start_events = random.choices(list(event_dist.keys()), weights=list(event_dist.values()), k=N_contaminate)
# generate synthetic trajectories
new_rows = []
alpha_estimates = []
for start_event in start_events:
# sample trajectory from agent model, using a state that contains the event type
mdp_states = model.mdp.get('states', []) if model.mdp else []
matching_starts = [s for s in mdp_states if start_event in s]
if not matching_starts:
continue # skip if no matching start state
start_state = random.choice(matching_starts)
trajectory = model.sample_traj(start_state, max_len=20)
score_payload: list[SimpleNamespace] = []
score: dict[str, float] = {}
if SEPARABILITY_ARTIFACTS:
score_payload = _states_to_events(trajectory)
score = score_session(score_payload, SEPARABILITY_ARTIFACTS)
alpha_estimates.append(
estimate_alpha(score["prob_agent"], score["delta_h"], score["delta_a"], temperature=2.0)
)
for state in trajectory:
parts = state.split('|') if isinstance(state, str) else [start_event]
new_rows.append({
on: parts[-1] if parts else start_event,
'source': 'synthetic_agent',
'prob_agent': score.get('prob_agent') if SEPARABILITY_ARTIFACTS and score_payload else None,
'delta_h': score.get('delta_h') if SEPARABILITY_ARTIFACTS and score_payload else None,
'delta_a': score.get('delta_a') if SEPARABILITY_ARTIFACTS and score_payload else None,
})
if new_rows:
contaminate_df = pd.DataFrame(new_rows)
df = pd.concat([df, contaminate_df], ignore_index=True)
if alpha_estimates:
df['estimated_alpha'] = sum(alpha_estimates) / len(alpha_estimates)
return df

View File

@@ -7,15 +7,6 @@ import pandas as pd
class PricingFunction(ABC): class PricingFunction(ABC):
""" """
Abstract base for pricing functions. Abstract base for pricing functions.
Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
Where:
Q_t ∈ R^n: demand vector at time t
P_t ∈ R^n: price vector at time t
S_t: session features (behavioral signals, interactions)
H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
Objective: Objective:
maximize E[R_T] = E[Σ P_t^T · Q_t] maximize E[R_T] = E[Σ P_t^T · Q_t]
subject to: subject to:
@@ -28,10 +19,10 @@ class PricingFunction(ABC):
def fit(self, *kwargs): def fit(self, *kwargs):
""" """
Offline training on historical data. Offline training on historical data.
This is where we can think about some maximization of expected revenue
over historical trajectories to learn parameters of the pricing function.
(This however we cover move in the RL side of things)
Args:
historical_data: DataFrame with elasticity, prices, demand signals
**kwargs: additional training parameters
""" """
pass pass
@@ -39,12 +30,18 @@ class PricingFunction(ABC):
def predict(self, *kwargs) -> np.ndarray: def predict(self, *kwargs) -> np.ndarray:
""" """
Generate optimal prices given current state. Generate optimal prices given current state.
This is an abstract method that transitions from τ -> P*
which is the mapping from the trajectory to optimal prices under
some subset of session grouping (so, per sessionId)
"""
pass
Args: @abstractmethod
state_space: StateSpace object containing Q_t, P_t, S_t, H_t def _get_features(self, *kwargs) -> np.ndarray:
"""
Extract features from trajectory for pricing decision.
Returns: Returns:
P_{t+1}: price vector in R^n np.ndarray of shape (n_products, n_features)
""" """
pass pass

View File

@@ -57,3 +57,13 @@ class ElasticityBasedPricer(PricingFunction):
# enforce bounds # enforce bounds
prices = np.clip(prices, self.price_floor, self.price_ceil) prices = np.clip(prices, self.price_floor, self.price_ceil)
return prices return prices
def _get_features(self, state_space=None) -> np.ndarray:
"""Extract elasticity, demand, and demand deviation for each product"""
if state_space is None or self.elasticity is None:
n = len(self.elasticity) if self.elasticity is not None else 0
return np.zeros((n, 3))
demand = np.asarray(state_space.demand)
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
return np.column_stack([self.elasticity, demand, demand_dev])

View File

@@ -107,6 +107,36 @@ class SessionAwarePricer(PricingFunction):
return prices return prices
def _get_features(self, state_space=None) -> np.ndarray:
"""Extract elasticity, demand, and session features"""
if state_space is None or self.elasticity is None:
n = len(self.elasticity) if self.elasticity is not None else 0
return np.zeros((n, 5))
demand = np.asarray(state_space.demand)
n_products = len(demand)
# extract session features
velocity = 0.0
view_depth = 0.0
cart_to_view = 0.0
if not state_space.session_features.empty:
sf = state_space.session_features.iloc[0]
velocity = sf.get('interaction_velocity', 0.0)
view_depth = sf.get('product_view_depth', 0.0)
cart_to_view = sf.get('cart_to_view_ratio', 0.0)
# broadcast session features to all products
features = np.column_stack([
self.elasticity,
demand,
np.full(n_products, velocity),
np.full(n_products, view_depth),
np.full(n_products, cart_to_view)
])
return features
class ProductSpecificSessionPricer(PricingFunction): class ProductSpecificSessionPricer(PricingFunction):
""" """
@@ -170,3 +200,12 @@ class ProductSpecificSessionPricer(PricingFunction):
prices = np.clip(base_prices, self.price_floor, self.price_ceil) prices = np.clip(base_prices, self.price_floor, self.price_ceil)
return prices return prices
def _get_features(self, state_space=None) -> np.ndarray:
"""Extract elasticity and demand features for product-specific pricing"""
if state_space is None or self.elasticity is None:
n = len(self.elasticity) if self.elasticity is not None else 0
return np.zeros((n, 2))
demand = np.asarray(state_space.demand)
return np.column_stack([self.elasticity, demand])

View File

@@ -3,6 +3,46 @@ import pandas as pd
from procesing.pricers.base import PricingFunction from procesing.pricers.base import PricingFunction
def session_features_to_demand(session_features: pd.DataFrame) -> float:
"""
Map session behavioral features to demand proxy.
THIS is the critical θ̂ → D transformation for rule-based pricing.
Logic:
- High velocity → agent behavior → price up (revenue recovery)
- High cart ratio → purchase intent → price up
- Low activity → discount to convert
Returns: demand proxy score (0-20 range, higher = more demand)
"""
if session_features.empty:
return 1.0
feat = session_features.iloc[0] if len(session_features) > 0 else {}
velocity = feat.get('interaction_velocity', 0)
cart_ratio = feat.get('cart_to_view_ratio', 0)
item_views = feat.get('item_views', 0)
cart_adds = feat.get('cart_adds', 0)
# baseline demand
demand = 1.0
# agent detection: high velocity → treat as high "demand" to price up
if velocity > 2.0:
demand += 10.0 # strong agent signal
# conversion intent: cart interaction → price up
if cart_ratio > 0.1 or cart_adds > 0:
demand += 5.0
# browsing depth: many views → interest signal
if item_views > 3:
demand += min(item_views, 5.0)
return min(demand, 20.0) # cap at 20
class StaticPricer(PricingFunction): class StaticPricer(PricingFunction):
"""Static pricing: always return fixed base prices""" """Static pricing: always return fixed base prices"""
@@ -25,6 +65,11 @@ class StaticPricer(PricingFunction):
raise ValueError("Must call fit() or provide base_prices in constructor") raise ValueError("Must call fit() or provide base_prices in constructor")
return self.base_prices.copy() return self.base_prices.copy()
def _get_features(self, state_space=None) -> np.ndarray:
"""Static pricer uses no features, returns empty array"""
n = len(self.base_prices) if self.base_prices is not None else 0
return np.zeros((n, 0))
class RandomPricer(PricingFunction): class RandomPricer(PricingFunction):
"""Random pricing within bounds (for baseline comparison)""" """Random pricing within bounds (for baseline comparison)"""
@@ -47,6 +92,11 @@ class RandomPricer(PricingFunction):
self.n_products = len(state_space.demand) self.n_products = len(state_space.demand)
return self.rng.uniform(self.price_min, self.price_max, size=self.n_products) return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)
def _get_features(self, state_space=None) -> np.ndarray:
"""Random pricer uses no features"""
n = self.n_products if self.n_products else 0
return np.zeros((n, 0))
class SimpleSurgePricer(PricingFunction): class SimpleSurgePricer(PricingFunction):
""" """
@@ -67,21 +117,25 @@ class SimpleSurgePricer(PricingFunction):
self.surge_multiplier = surge_multiplier self.surge_multiplier = surge_multiplier
self.discount_multiplier = discount_multiplier self.discount_multiplier = discount_multiplier
def fit(self, market_data : pd.DataFrame): def fit(self, market_data: pd.DataFrame):
"""Extract base prices from product catalog or historical averages""" """Extract base prices from product catalog or historical averages"""
self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values self.base_prices = market_data['base_price'].to_numpy() if 'base_price' in market_data.columns else market_data['price'].values
self.demand_history = market_data['demand'].to_numpy() if 'demand' in market_data.columns else np.zeros_like(self.base_prices) return self
def predict(self) -> np.ndarray: def predict(self, state_space) -> np.ndarray:
""" """
Adjust prices based on current demand using surge rules. Adjust prices based on current demand using surge rules.
state_space.demand: demand counts per product state_space.demand: demand proxy per product (from session features)
state_space.prices: current prices (fallback if base_prices not set) state_space.prices: base prices
""" """
current_prices = self.base_prices if self.base_prices is not None else np.ones_like(demand_vector) * 99.99 demand = np.asarray(state_space.demand) if state_space and hasattr(state_space, 'demand') else np.array([0])
demand = self.demand_history if self.demand_history is not None else np.zeros_like(current_prices) base = np.asarray(state_space.prices) if state_space and hasattr(state_space, 'prices') else self.base_prices
new_prices = current_prices.copy()
if base is None:
base = np.ones(len(demand)) * 99.99
# ensure float dtype to allow multiplication by float multipliers
new_prices = base.astype(np.float64).copy()
high_mask = demand >= self.high_threshold high_mask = demand >= self.high_threshold
new_prices[high_mask] *= self.surge_multiplier new_prices[high_mask] *= self.surge_multiplier
@@ -89,3 +143,16 @@ class SimpleSurgePricer(PricingFunction):
new_prices[low_mask] *= self.discount_multiplier new_prices[low_mask] *= self.discount_multiplier
return new_prices return new_prices
def _get_features(self, state_space=None) -> np.ndarray:
"""Extract demand and base price features for each product"""
if state_space is None:
n = len(self.base_prices) if self.base_prices is not None else 0
return np.zeros((n, 2))
demand = np.asarray(state_space.demand) if hasattr(state_space, 'demand') else np.array([0])
base = np.asarray(state_space.prices) if hasattr(state_space, 'prices') else self.base_prices
if base is None:
base = np.ones(len(demand)) * 99.99
return np.column_stack([demand, base])

View File

@@ -135,6 +135,7 @@ class ExtractSessionFeaturesStep(BaseContextStep):
Vectorized session feature extraction - replaces O(n^2) per-row loop. Vectorized session feature extraction - replaces O(n^2) per-row loop.
Input: interactions_df Input: interactions_df
Output: session-level feature matrix Output: session-level feature matrix
THIS is our main mapping from tau (trajectory) to some features vector theta - we need to do this very well. This is what will go into demand esimation.
""" """
def transform(self, X: pd.DataFrame) -> pd.DataFrame: def transform(self, X: pd.DataFrame) -> pd.DataFrame:

View File

@@ -6,6 +6,7 @@ from procesing.steps import (
) )
def test_compute_demand(pipeline_context): def test_compute_demand(pipeline_context):
random.seed(42) # deterministic test
step = ComputeDemandStep(context=pipeline_context) step = ComputeDemandStep(context=pipeline_context)
# Test with normal interaction data # Test with normal interaction data
@@ -26,6 +27,7 @@ def test_compute_demand(pipeline_context):
def test_compute_demand_skewed(pipeline_context): def test_compute_demand_skewed(pipeline_context):
random.seed(42) # deterministic test
step = ComputeDemandStep(context=pipeline_context) step = ComputeDemandStep(context=pipeline_context)
# Test with normal interaction data # Test with normal interaction data

41
lib/__init__.py Normal file
View File

@@ -0,0 +1,41 @@
"""PHANTOM shared library
Exports unified utilities for features, state, config, kafka, and model registry
"""
from .config import (
PROJECT_ROOT, DATA_DIR, EXPERIMENTS_DIR,
AGENT_DATA_DIR, HUMAN_DATA_DIR, SIM_RUNS_DIR, MODEL_REGISTRY_DIR,
COLLECTED_DATA_DIR, NOTEBOOK_OUTPUT_DIR,
ensure_dir, get_data_path, get_experiments_path, get_sim_path,
KAFKA_HOST, KAFKA_PORT, KAFKA_BROKER,
REDIS_HOST, REDIS_PORT,
SUPABASE_URL, SUPABASE_ANON_KEY,
BACKEND_PORT, PROVIDER_PORT
)
from .state import (
make_state_repr, event_to_state, parse_state,
get_event_name, get_timestamp,
create_state_fn, create_event_name_fn, create_timestamp_fn
)
from .features import (
transition_histogram, temporal_signature, state_coverage, transition_entropy,
event_type_distribution, featurize_trajectory, parse_timestamp
)
__all__ = [
# config
'PROJECT_ROOT', 'DATA_DIR', 'EXPERIMENTS_DIR',
'AGENT_DATA_DIR', 'HUMAN_DATA_DIR', 'SIM_RUNS_DIR', 'MODEL_REGISTRY_DIR',
'COLLECTED_DATA_DIR', 'NOTEBOOK_OUTPUT_DIR',
'ensure_dir', 'get_data_path', 'get_experiments_path', 'get_sim_path',
'KAFKA_HOST', 'KAFKA_PORT', 'KAFKA_BROKER',
'REDIS_HOST', 'REDIS_PORT',
'SUPABASE_URL', 'SUPABASE_ANON_KEY',
'BACKEND_PORT', 'PROVIDER_PORT',
# state
'make_state_repr', 'event_to_state', 'parse_state',
'get_event_name', 'get_timestamp',
'create_state_fn', 'create_event_name_fn', 'create_timestamp_fn',
# features
'transition_histogram', 'temporal_signature', 'state_coverage', 'transition_entropy',
'event_type_distribution', 'featurize_trajectory', 'parse_timestamp',
]

65
lib/config.py Normal file
View File

@@ -0,0 +1,65 @@
"""Unified path configuration for PHANTOM project
All hardcoded paths should reference this module
Paths can be overridden via environment variables
"""
import os
from pathlib import Path
# project root (directory containing lib/, experiments/, sim/, web/, backend/)
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
# data directories
DATA_DIR = Path(os.getenv('PHANTOM_DATA_DIR', PROJECT_ROOT / 'data'))
EXPERIMENTS_DIR = Path(os.getenv('PHANTOM_EXPERIMENTS_DIR', PROJECT_ROOT / 'experiments'))
# agent/human interaction data
AGENT_DATA_DIR = Path(os.getenv('PHANTOM_AGENT_DATA_DIR', DATA_DIR / 'agents'))
HUMAN_DATA_DIR = Path(os.getenv('PHANTOM_HUMAN_DATA_DIR', DATA_DIR / 'humans'))
# RL simulation runs
SIM_RUNS_DIR = Path(os.getenv('PHANTOM_SIM_RUNS_DIR', PROJECT_ROOT / 'sim' / 'rl' / 'runs'))
# model artifacts
MODEL_REGISTRY_DIR = Path(os.getenv('PHANTOM_MODEL_REGISTRY_DIR', DATA_DIR / 'models'))
# collected experiment data
COLLECTED_DATA_DIR = Path(os.getenv('PHANTOM_COLLECTED_DATA_DIR', EXPERIMENTS_DIR / 'agents' / 'collected_data'))
# notebook outputs
NOTEBOOK_OUTPUT_DIR = Path(os.getenv('PHANTOM_NOTEBOOK_OUTPUT_DIR', EXPERIMENTS_DIR / 'notebooks' / 'outputs'))
def ensure_dir(path: Path) -> Path:
"""ensure directory exists, create if needed"""
path.mkdir(parents=True, exist_ok=True)
return path
def get_data_path(*parts: str) -> Path:
"""construct path relative to DATA_DIR"""
return DATA_DIR.joinpath(*parts)
def get_experiments_path(*parts: str) -> Path:
"""construct path relative to EXPERIMENTS_DIR"""
return EXPERIMENTS_DIR.joinpath(*parts)
def get_sim_path(*parts: str) -> Path:
"""construct path relative to SIM_RUNS_DIR"""
return SIM_RUNS_DIR.joinpath(*parts)
# service configuration (from .env)
KAFKA_HOST = os.getenv('KAFKA_HOST', 'localhost')
KAFKA_PORT = os.getenv('KAFKA_PORT', '9092')
KAFKA_BROKER = f"{KAFKA_HOST}:{KAFKA_PORT}"
REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
REDIS_PORT = int(os.getenv('REDIS_PORT', '6379'))
SUPABASE_URL = os.getenv('NEXT_PUBLIC_SUPABASE_URL', '')
SUPABASE_ANON_KEY = os.getenv('NEXT_PUBLIC_SUPABASE_ANON_KEY', '')
BACKEND_PORT = int(os.getenv('BACKEND_PORT', '5000'))
PROVIDER_PORT = int(os.getenv('PROVIDER_PORT', '5001'))

125
lib/features.py Normal file
View File

@@ -0,0 +1,125 @@
"""Unified featurization utilities for trajectory -> feature vector conversion
Used by both experiments/ml/ and sim/rl/ components
"""
import numpy as np
from collections import defaultdict
from typing import List, Dict, Callable, Optional, Any, Set
from datetime import datetime
def transition_histogram(events: List, state_fn: Callable, max_states: int = 50) -> np.ndarray:
"""compute normalized histogram of state transitions in trajectory
events: list of event objects/dicts
state_fn: function mapping event -> state string
max_states: maximum dimensions for histogram
"""
if len(events) < 2:
return np.zeros(max_states, dtype=np.float32)
states = [state_fn(e) for e in events]
trans_counts = defaultdict(int)
for s, s_next in zip(states, states[1:]):
trans_counts[(s, s_next)] += 1
total = sum(trans_counts.values())
hist = np.array(list(trans_counts.values())[:max_states], dtype=np.float32)
hist = np.pad(hist, (0, max(0, max_states - len(hist))))
return hist / (total + 1e-10)
def temporal_signature(events: List, ts_fn: Callable) -> np.ndarray:
"""extract temporal features: mean/std/skew of inter-event times plus count
events: list of event objects/dicts
ts_fn: function mapping event -> timestamp (float seconds)
returns: [mean_dt, std_dt, skew, n_intervals] array
"""
if len(events) < 2:
return np.zeros(4, dtype=np.float32)
times = sorted([ts_fn(e) for e in events])
diffs = np.diff(times).astype(np.float32)
if len(diffs) == 0:
return np.zeros(4, dtype=np.float32)
mean_dt, std_dt = np.mean(diffs), np.std(diffs) + 1e-10
skew = np.mean(((diffs - mean_dt) / std_dt) ** 3) if std_dt > 1e-8 else 0.0
return np.array([mean_dt, std_dt, skew, len(diffs)], dtype=np.float32)
def state_coverage(events: List, state_fn: Callable, mdp_states: Set[str]) -> float:
"""fraction of MDP states visited by trajectory
events: list of event objects/dicts
state_fn: function mapping event -> state string
mdp_states: set of all possible MDP states
"""
if not mdp_states:
return 0.0
visited = set(state_fn(e) for e in events)
return len(visited & mdp_states) / len(mdp_states)
def transition_entropy(events: List, state_fn: Callable) -> float:
"""compute entropy of transition distribution (randomness of navigation)
higher entropy = more random browsing pattern
"""
if len(events) < 2:
return 0.0
states = [state_fn(e) for e in events]
trans_counts = defaultdict(int)
for s, s_next in zip(states, states[1:]):
trans_counts[(s, s_next)] += 1
total = sum(trans_counts.values())
probs = [c / total for c in trans_counts.values()]
return -sum(p * np.log(p + 1e-10) for p in probs)
def event_type_distribution(events: List, event_name_fn: Callable) -> np.ndarray:
"""compute proportions of different event type categories
returns: [page_view_ratio, hover_ratio, cart_ratio, purchase_ratio]
"""
if not events:
return np.zeros(4, dtype=np.float32)
n = len(events)
names = [event_name_fn(e).lower() for e in events]
return np.array([
sum(1 for nm in names if 'page' in nm or 'view' in nm) / n,
sum(1 for nm in names if 'hover' in nm) / n,
sum(1 for nm in names if 'cart' in nm) / n,
sum(1 for nm in names if 'purchase' in nm or 'checkout' in nm) / n
], dtype=np.float32)
def featurize_trajectory(events: List, state_fn: Callable, ts_fn: Callable,
event_name_fn: Callable, mdp_states: Optional[Set[str]] = None,
output_dim: int = 64) -> np.ndarray:
"""convert trajectory to fixed-dimension feature vector
events: list of event objects/dicts
state_fn: function mapping event -> state string
ts_fn: function mapping event -> timestamp (float)
event_name_fn: function mapping event -> event name string
mdp_states: optional set of all MDP states for coverage calculation
output_dim: desired output dimension (will pad/truncate)
"""
feats = []
feats.extend(transition_histogram(events, state_fn, max_states=40)) # 40 dims
feats.extend(temporal_signature(events, ts_fn)) # 4 dims
feats.append(state_coverage(events, state_fn, mdp_states or set())) # 1 dim
feats.append(transition_entropy(events, state_fn)) # 1 dim
feats.append(float(len(events))) # trajectory length
feats.append(float(len(set(state_fn(e) for e in events)))) # unique states
feats.extend(event_type_distribution(events, event_name_fn)) # 4 dims
feats = np.array(feats[:output_dim], dtype=np.float32)
if len(feats) < output_dim:
feats = np.pad(feats, (0, output_dim - len(feats)))
return feats
def parse_timestamp(ts: Any) -> float:
"""parse various timestamp formats to float seconds"""
if ts is None:
return 0.0
if isinstance(ts, (int, float)):
return float(ts)
if isinstance(ts, str):
try:
return datetime.fromisoformat(ts.replace('Z', '+00:00')).timestamp()
except ValueError:
return 0.0
return 0.0

54
lib/kafka_client.py Executable file
View File

@@ -0,0 +1,54 @@
from kafka import KafkaConsumer
import json
import os
from dotenv import load_dotenv
load_dotenv()
def get_interactions(
topic='user-interactions',
bootstrap_servers=None,
from_beginning=True,
max_records=None,
timeout_ms=5000
):
"""Consume interaction events from Kafka.
Args:
topic: Kafka topic name
bootstrap_servers: Kafka broker address (default from env)
from_beginning: Start from earliest offset if True
max_records: Max number of records to fetch (None = all available)
timeout_ms: Consumer poll timeout
Returns:
List of parsed interaction event dicts
"""
if not bootstrap_servers:
host = os.getenv('KAFKA_HOST', 'localhost')
port = os.getenv('KAFKA_PORT', '9092')
bootstrap_servers = f'{host}:{port}'
consumer = KafkaConsumer(
topic,
bootstrap_servers=bootstrap_servers,
auto_offset_reset='earliest' if from_beginning else 'latest',
enable_auto_commit=False,
value_deserializer=lambda m: json.loads(m.decode('utf-8')),
consumer_timeout_ms=timeout_ms
)
events = []
try:
for msg in consumer:
events.append(msg.value)
if max_records and len(events) >= max_records:
break
finally:
consumer.close()
return events
if __name__ == '__main__':
interactions = get_interactions(max_records=10)
for event in interactions:
print(event)

View File

@@ -178,3 +178,49 @@ class ModelRegistry:
return True return True
except: except:
return False return False
def set_session_prices(self, session_id: str, prices: Dict[str, float], ttl: int = 1800):
"""
Store prices for a specific session.
THIS is the write path for session-aware pricing.
Args:
session_id: session identifier
prices: dict of {productId: price}
ttl: time-to-live in seconds (default 30min)
"""
if not prices:
return
key = f"session:{session_id}:prices"
# use Redis hash for O(1) lookup per product
self.redis_client.hset(key, mapping={k: str(v) for k, v in prices.items()})
self.redis_client.expire(key, ttl)
def get_session_price(self, session_id: str, product_id: str) -> Optional[float]:
"""
Lookup price for (sessionId, productId).
THIS is the read path for fast provider lookup.
Returns: price or None if not found
"""
key = f"session:{session_id}:prices"
price_str = self.redis_client.hget(key, product_id)
if price_str is None:
return None
return float(price_str.decode('utf-8') if isinstance(price_str, bytes) else price_str)
def get_session_all_prices(self, session_id: str) -> Dict[str, float]:
"""Get all prices for a session."""
key = f"session:{session_id}:prices"
prices_raw = self.redis_client.hgetall(key)
if not prices_raw:
return {}
return {
(k.decode('utf-8') if isinstance(k, bytes) else k): float(v.decode('utf-8') if isinstance(v, bytes) else v)
for k, v in prices_raw.items()
}

72
lib/state.py Normal file
View File

@@ -0,0 +1,72 @@
"""Unified state representation utilities for MDP state encoding
Used by both experiments/ and sim/ components for consistent state handling
"""
from typing import Any, Callable
def make_state_repr(page: str = None, product_id: str = None, event_name: str = None) -> str:
"""create canonical state representation string from components
format: page|productId|eventName
"""
p = page or 'unk'
pid = product_id or 'none'
en = event_name or 'unknown'
return f"{p}|{pid}|{en}"
def event_to_state(evt: Any) -> str:
"""convert event object/dict to state string
supports both object attributes and dict keys
"""
if isinstance(evt, dict):
return make_state_repr(
page=evt.get('page'),
product_id=evt.get('productId'),
event_name=evt.get('eventName') or evt.get('event_type')
)
return make_state_repr(
page=getattr(evt, 'page', None),
product_id=getattr(evt, 'productId', None),
event_name=getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None)
)
def parse_state(state_str: str) -> dict:
"""parse state string back to components
returns: {'page': str, 'productId': str, 'eventName': str}
"""
parts = state_str.split('|')
return {
'page': parts[0] if len(parts) > 0 and parts[0] != 'unk' else None,
'productId': parts[1] if len(parts) > 1 and parts[1] != 'none' else None,
'eventName': parts[2] if len(parts) > 2 and parts[2] != 'unknown' else None
}
def get_event_name(evt: Any) -> str:
"""extract event name from event object/dict"""
if isinstance(evt, dict):
return evt.get('eventName') or evt.get('event_type') or ''
return getattr(evt, 'eventName', None) or getattr(evt, 'event_type', None) or ''
def get_timestamp(evt: Any) -> Any:
"""extract timestamp from event object/dict"""
if isinstance(evt, dict):
return evt.get('ts') or evt.get('timestamp')
return getattr(evt, 'ts', None) or getattr(evt, 'timestamp', None)
def create_state_fn() -> Callable:
"""factory for state representation function"""
return event_to_state
def create_event_name_fn() -> Callable:
"""factory for event name extraction function"""
return get_event_name
def create_timestamp_fn() -> Callable:
"""factory for timestamp extraction function (returns raw value, use features.parse_timestamp to convert)"""
return get_timestamp

2
sim/case/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
"""Case-specific simulations and experiments."""

View File

@@ -0,0 +1,2 @@
"""Minimal thesis-aligned pricing simulation (self-contained)."""

View File

@@ -0,0 +1,125 @@
"""Cost of Information (COI) computation for thesis pricing system.
Core KPI: COI = E[p_shown] - p_min measures pricing power from information asymmetry.
Theorem 1 shows COI erodes as agent queries increase: as N->inf, p^(1)->p_min.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .simplified import Session
@dataclass(frozen=True)
class COIWindow:
"""Windowed COI metrics computed from realized price exposures.
policy: E[p_shown] - cost, the definition-level KPI
agent: E[p^(1)] - cost where p^(1) is min price under agent querying
leak: max(policy - agent, 0), observable gap from reconnaissance
survival_ratio: agent/policy, fraction of pricing power retained
"""
policy: float
agent: float
leak: float
survival_ratio: float
policy_by_product: np.ndarray
agent_by_product: np.ndarray
demand_weights: np.ndarray
def aggregate_prices(sessions: List["Session"], mode: str = "all") -> Dict[int, List[float] | float]:
"""Unified price aggregation across sessions.
mode: "all" returns all prices per product, "min_per_session" returns min price per session per product,
"min_across" returns single min price per product
"""
if mode == "min_across":
mins: Dict[int, float] = {}
for s in sessions:
for e in s.events:
pidx, price = int(e.product_idx), float(e.price_seen)
mins[pidx] = min(mins.get(pidx, price), price)
return mins
elif mode == "min_per_session":
result: Dict[int, List[float]] = {}
for s in sessions:
by_p: Dict[int, float] = {}
for e in s.events:
pidx, price = int(e.product_idx), float(e.price_seen)
by_p[pidx] = min(by_p.get(pidx, price), price)
for pidx, pmin in by_p.items():
result.setdefault(pidx, []).append(pmin)
return result
else: # "all"
prices: Dict[int, List[float]] = {}
for s in sessions:
for e in s.events:
prices.setdefault(e.product_idx, []).append(float(e.price_seen))
return prices
def demand_weights_by_product(sessions: List["Session"], demand_mapping: Dict[str, float], n_products: int) -> np.ndarray:
"""Compute demand-weighted importance per product."""
w = np.zeros(n_products, dtype=float)
sessions_by_id = {s.sid: s for s in sessions}
for sid, q in demand_mapping.items():
sess = sessions_by_id.get(sid)
if sess and sess.events:
w[int(sess.events[0].product_idx)] += float(q)
total = float(np.sum(w))
return (w / total) if total > 0 else w
def compute_coi_window(sessions: List["Session"], costs: np.ndarray, demand_mapping: Dict[str, float] | None = None) -> COIWindow:
"""Compute COI metrics over session window.
Aggregates price exposures and computes policy-level vs agent-realized COI.
"""
n = int(len(costs))
prices = aggregate_prices(sessions, mode="all")
agent_sessions = [s for s in sessions if s.actor == "A"]
agent_min = aggregate_prices(agent_sessions, mode="min_across") if agent_sessions else {}
policy_by = np.zeros(n, dtype=float)
agent_by = np.zeros(n, dtype=float)
seen = np.array([(i in prices) for i in range(n)], dtype=bool)
agent_seen = np.array([(i in agent_min) for i in range(n)], dtype=bool)
for pidx, ps in prices.items():
if 0 <= pidx < n and ps:
policy_by[pidx] = float(np.mean(ps) - float(costs[pidx]))
for pidx, pmin in agent_min.items():
if 0 <= pidx < n:
agent_by[pidx] = float(pmin - float(costs[pidx]))
agent_by[seen & ~agent_seen] = policy_by[seen & ~agent_seen] # no erosion if no agent exposure
demand_w = demand_weights_by_product(sessions, demand_mapping, n) if demand_mapping else np.zeros(n, dtype=float)
has_weights = float(np.sum(demand_w)) > 0
if has_weights:
policy, agent = float(np.dot(demand_w, policy_by)), float(np.dot(demand_w, agent_by))
elif np.any(seen):
policy, agent = float(np.mean(policy_by[seen])), float(np.mean(agent_by[seen]))
else:
policy, agent = 0.0, 0.0
leak = float(max(policy - agent, 0.0))
survival = float(np.clip(agent / policy, 0.0, 1.0)) if policy > 0 else 0.0
return COIWindow(policy=policy, agent=agent, leak=leak, survival_ratio=survival,
policy_by_product=policy_by, agent_by_product=agent_by, demand_weights=demand_w)
def coi_erosion(coi_policy: float, coi_agent: float, eps: float = 1e-9) -> float:
"""Thesis-consistent COI erosion: fraction of pricing power destroyed by agent queries.
erosion = 1 - (COI_agent / COI_policy)
When agents find low prices, COI_agent -> 0, erosion -> 1.
"""
if coi_policy <= eps:
return 0.0
return float(np.clip(1.0 - (coi_agent / (coi_policy + eps)), 0.0, 1.0))

View File

@@ -0,0 +1,325 @@
"""COI leakage experiments and policy comparisons.
Demonstrates the core thesis contribution: COI erosion under agent contamination
and recovery via robust pricing policies.
Generates TensorBoard logs for:
- COI erosion curves across contamination levels
- Policy comparison (fixed vs adaptive vs RL)
- Revenue/margin trade-offs
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Tuple
import json
import numpy as np
try:
from torch.utils.tensorboard import SummaryWriter
HAS_TB = True
except ImportError:
HAS_TB = False
from .simplified_env import PricingEnv, EnvConfig, make_env
from .simplified import System
@dataclass
class ExperimentResult:
"""Container for experiment metrics."""
name: str
alpha: float
reward_mean: float
reward_std: float
coi_erosion: float
alpha_error: float
revenue: float
margin: float
def to_dict(self) -> dict:
return {k: getattr(self, k) for k in self.__dataclass_fields__}
def theoretical_coi_erosion_curve(alphas: np.ndarray, n_sessions: int = 1000) -> np.ndarray:
"""Theoretical COI erosion from Theorem 1 using order statistic model.
For N i.i.d. uniform queries on [p_min, p_max]:
E[p^(1)] = p_min + (p_max - p_min)/(N+1), so erosion = 1 - 2/(N+1)
"""
erosions = []
for a in alphas:
n_agents = max(1, int(a * n_sessions))
erosions.append(1.0 - 2.0 / (n_agents + 1))
return np.array(erosions)
def run_policy_episode(
env: PricingEnv,
policy_fn,
n_episodes: int = 10
) -> Tuple[List[float], List[float], List[float], List[float]]:
"""Run policy and collect per-step metrics."""
rewards, coi_erosions, alpha_errors, revenues = [], [], [], []
for _ in range(n_episodes):
obs, info = env.reset()
done = False
while not done:
action = policy_fn(obs, env.n)
obs, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
rewards.append(reward)
if 'coi_erosion' in info:
coi_erosions.append(info['coi_erosion'])
if 'alpha_true' in info and 'alpha_est' in info:
alpha_errors.append(abs(info['alpha_true'] - info['alpha_est']))
if 'revenue' in info:
revenues.append(info['revenue'])
return rewards, coi_erosions, alpha_errors, revenues
class PolicyRegistry:
"""Registry of baseline policies."""
@staticmethod
def fixed(obs: np.ndarray, n: int, margin: float = 0.15) -> np.ndarray:
return np.ones(n, dtype=np.float32) * (1.0 + margin)
@staticmethod
def random(obs: np.ndarray, n: int, rng: np.random.Generator = None) -> np.ndarray:
rng = rng or np.random.default_rng()
return rng.uniform(0.7, 1.3, n).astype(np.float32)
@staticmethod
def adaptive(obs: np.ndarray, n: int, base_margin: float = 0.15) -> np.ndarray:
"""Reduce margins when alpha estimate is high."""
alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
margin_scale = 1.0 - 0.4 * alpha_est
return np.ones(n, dtype=np.float32) * (1.0 + base_margin * margin_scale)
@staticmethod
def aggressive(obs: np.ndarray, n: int) -> np.ndarray:
"""High margins, ignores contamination."""
return np.ones(n, dtype=np.float32) * 1.4
@staticmethod
def defensive(obs: np.ndarray, n: int) -> np.ndarray:
"""Low margins, always cautious."""
return np.ones(n, dtype=np.float32) * 1.05
@staticmethod
def alpha_proportional(obs: np.ndarray, n: int, max_margin: float = 0.3) -> np.ndarray:
"""Margin inversely proportional to estimated alpha."""
alpha_est = obs[2 * n] if len(obs) > 2 * n else 0.2
margin = max_margin * (1.0 - alpha_est)
return np.ones(n, dtype=np.float32) * (1.0 + margin)
def run_contamination_sweep(
alphas: List[float],
policies: Dict[str, callable],
n_products: int = 10,
max_steps: int = 200,
n_episodes: int = 10,
seed: int = 42,
log_dir: str = None
) -> Dict[str, List[ExperimentResult]]:
"""Run policies across contamination levels."""
results = {name: [] for name in policies}
writer = SummaryWriter(Path(log_dir) / "sweep") if log_dir and HAS_TB else None
for alpha in alphas:
print(f" alpha={alpha:.2f}", end=" ")
env_cfg = EnvConfig(
n_products=n_products, max_steps=max_steps,
alpha_true=alpha, reward_mode="robust", seed=seed)
env = make_env(env_cfg)
for name, policy_fn in policies.items():
rewards, coi_vals, alpha_errs, revenues = run_policy_episode(env, policy_fn, n_episodes)
result = ExperimentResult(
name=name, alpha=alpha,
reward_mean=float(np.mean(rewards)),
reward_std=float(np.std(rewards)),
coi_erosion=float(np.mean(coi_vals)) if coi_vals else 0.0,
alpha_error=float(np.mean(alpha_errs)) if alpha_errs else 0.0,
revenue=float(np.mean(revenues)) if revenues else 0.0,
margin=float(np.mean([policy_fn(np.zeros(3 * n_products + 3), n_products)]) - 1.0))
results[name].append(result)
if writer:
step = int(alpha * 100)
writer.add_scalar(f'{name}/reward', result.reward_mean, step)
writer.add_scalar(f'{name}/coi_erosion', result.coi_erosion, step)
writer.add_scalar(f'{name}/alpha_error', result.alpha_error, step)
writer.add_scalar(f'{name}/revenue', result.revenue, step)
print(f"done")
# add theoretical curve
if writer:
theo = theoretical_coi_erosion_curve(np.array(alphas))
for i, (a, e) in enumerate(zip(alphas, theo)):
writer.add_scalar('theoretical/coi_erosion', e, int(a * 100))
writer.close()
return results
def run_coi_demonstration(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
"""Main COI demonstration experiment."""
print("=== COI Leakage Demonstration ===\n")
Path(log_dir).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter(Path(log_dir) / "coi_demo") if HAS_TB else None
# theoretical erosion curve
print("1. Theoretical COI erosion (Theorem 1)")
alphas = np.linspace(0.0, 0.6, 13)
theo_erosion = theoretical_coi_erosion_curve(alphas, n_sessions=1000)
for a, e in zip(alphas, theo_erosion):
print(f" alpha={a:.2f} -> erosion={e:.3f}")
if writer:
writer.add_scalar('theory/coi_erosion', e, int(a * 100))
# policy comparison
print("\n2. Policy comparison across contamination levels")
policies = {
'fixed': lambda obs, n: PolicyRegistry.fixed(obs, n),
'aggressive': PolicyRegistry.aggressive,
'defensive': PolicyRegistry.defensive,
'adaptive': PolicyRegistry.adaptive,
'alpha_proportional': PolicyRegistry.alpha_proportional,
}
sweep_alphas = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
results = run_contamination_sweep(
sweep_alphas, policies, n_products=10, max_steps=100,
n_episodes=5, seed=seed, log_dir=log_dir)
# summarize
print("\n3. Summary by policy")
for name, res_list in results.items():
avg_reward = np.mean([r.reward_mean for r in res_list])
avg_coi = np.mean([r.coi_erosion for r in res_list])
print(f" {name:20s}: avg_reward={avg_reward:.2f}, avg_coi={avg_coi:.3f}")
# save results
output = {
'theoretical': {'alphas': alphas.tolist(), 'erosion': theo_erosion.tolist()},
'empirical': {name: [r.to_dict() for r in res_list] for name, res_list in results.items()}}
with open(Path(log_dir) / "coi_demo_results.json", 'w') as f:
json.dump(output, f, indent=2)
if writer:
writer.close()
print(f"\nResults saved to {log_dir}/coi_demo_results.json")
print(f"TensorBoard: tensorboard --logdir {log_dir}")
return output
def run_reward_mode_comparison(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
"""Compare different reward modes."""
print("=== Reward Mode Comparison ===\n")
Path(log_dir).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter(Path(log_dir) / "reward_modes") if HAS_TB else None
reward_modes = ["revenue", "profit", "robust", "coi_aware"]
alpha = 0.3 # moderate contamination
results = {}
for mode in reward_modes:
print(f" mode={mode}", end=" ")
env_cfg = EnvConfig(
n_products=10, max_steps=200, alpha_true=alpha,
reward_mode=mode, seed=seed)
env = make_env(env_cfg)
rewards, coi_vals, _, revenues = run_policy_episode(
env, PolicyRegistry.adaptive, n_episodes=10)
results[mode] = {
'reward_mean': float(np.mean(rewards)),
'reward_std': float(np.std(rewards)),
'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
'revenue': float(np.mean(revenues)) if revenues else 0.0}
if writer:
for k, v in results[mode].items():
writer.add_scalar(f'{mode}/{k}', v, 0)
print(f"reward={results[mode]['reward_mean']:.2f}, coi={results[mode]['coi_erosion']:.3f}")
if writer:
writer.close()
with open(Path(log_dir) / "reward_mode_results.json", 'w') as f:
json.dump(results, f, indent=2)
return results
def run_alpha_drift_experiment(log_dir: str = "sim/case/thesis_simplified/runs", seed: int = 42) -> Dict:
"""Test policy robustness under non-stationary contamination."""
print("=== Alpha Drift Experiment ===\n")
Path(log_dir).mkdir(parents=True, exist_ok=True)
writer = SummaryWriter(Path(log_dir) / "alpha_drift") if HAS_TB else None
drift_rates = [0.0, 0.01, 0.02, 0.05]
results = {}
for drift in drift_rates:
print(f" drift={drift:.2f}", end=" ")
env_cfg = EnvConfig(
n_products=10, max_steps=200, alpha_true=0.2,
alpha_drift=drift, reward_mode="robust", seed=seed)
env = make_env(env_cfg)
rewards, coi_vals, alpha_errs, _ = run_policy_episode(
env, PolicyRegistry.adaptive, n_episodes=10)
results[f'drift_{drift}'] = {
'reward_mean': float(np.mean(rewards)),
'coi_erosion': float(np.mean(coi_vals)) if coi_vals else 0.0,
'alpha_tracking_error': float(np.mean(alpha_errs)) if alpha_errs else 0.0}
if writer:
for k, v in results[f'drift_{drift}'].items():
writer.add_scalar(f'drift_{drift}/{k}', v, 0)
print(f"reward={results[f'drift_{drift}']['reward_mean']:.2f}, "
f"alpha_err={results[f'drift_{drift}']['alpha_tracking_error']:.3f}")
if writer:
writer.close()
return results
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Run COI experiments")
parser.add_argument("--exp", type=str, default="coi", choices=["coi", "reward", "drift", "all"])
parser.add_argument("--log-dir", type=str, default="sim/case/thesis_simplified/runs")
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
if args.exp == "coi" or args.exp == "all":
run_coi_demonstration(args.log_dir, args.seed)
if args.exp == "reward" or args.exp == "all":
run_reward_mode_comparison(args.log_dir, args.seed)
if args.exp == "drift" or args.exp == "all":
run_alpha_drift_experiment(args.log_dir, args.seed)

View File

@@ -0,0 +1,72 @@
"""Behavioral separability for human/agent detection.
Computes divergence signals delta_H, delta_A from session trajectories using
transition kernel estimation and KL divergence to prototype behavioral profiles.
"""
from __future__ import annotations
from typing import Dict, List, Tuple, TYPE_CHECKING
import numpy as np
if TYPE_CHECKING:
from .simplified import Event, Session
# prototype behavioral kernels for human vs agent sessions
TRANS_H = {
"start": {"view": 0.85, "end": 0.15},
"view": {"detail": 0.4, "cart": 0.3, "view": 0.2, "end": 0.1},
"detail": {"cart": 0.5, "view": 0.3, "end": 0.2},
"cart": {"purchase": 0.6, "view": 0.25, "end": 0.15},
"purchase": {"end": 1.0},
}
TRANS_A = {
"start": {"view": 0.95, "end": 0.05},
"view": {"detail": 0.6, "view": 0.25, "cart": 0.1, "end": 0.05},
"detail": {"view": 0.5, "cart": 0.15, "detail": 0.3, "end": 0.05},
"cart": {"view": 0.4, "purchase": 0.2, "end": 0.4},
"purchase": {"end": 1.0},
}
def kl_div(p: Dict[str, float], q: Dict[str, float], eps: float = 1e-10) -> float:
"""KL divergence D_KL(p || q) for discrete distributions."""
keys = set(p.keys()) | set(q.keys())
return sum(p.get(k, eps) * np.log((p.get(k, eps) + eps) / (q.get(k, eps) + eps)) for k in keys)
def build_kernel(events: List["Event"]) -> Dict[str, Dict[str, float]]:
"""Build empirical transition kernel T' from trajectory events."""
trans: Dict[str, Dict[str, int]] = {}
prev = "start"
for e in events:
curr = e.action
trans.setdefault(prev, {})
trans[prev][curr] = trans[prev].get(curr, 0) + 1
prev = curr
return {s: {d: c / sum(dsts.values()) for d, c in dsts.items()} for s, dsts in trans.items() if sum(dsts.values()) > 0}
def compute_divergence(session: "Session") -> Tuple[float, float]:
"""Compute divergence signals delta_H, delta_A for session.
delta_H = mean KL(T' || T_H) across states, measures distance to human prototype
delta_A = mean KL(T' || T_A) across states, measures distance to agent prototype
"""
kernel = build_kernel(session.events)
if not kernel:
return 0.5, 0.5
delta_h = sum(kl_div(kernel.get(s, {}), TRANS_H.get(s, {})) for s in kernel) / len(kernel)
delta_a = sum(kl_div(kernel.get(s, {}), TRANS_A.get(s, {})) for s in kernel) / len(kernel)
return delta_h, delta_a
def estimate_alpha(session: "Session", beta: float = 2.0) -> float:
"""Per-session contamination estimate alpha_hat = sigma(beta*(delta_H - delta_A)).
Returns probability session is agent-generated based on behavioral divergence.
"""
dh, da = compute_divergence(session)
if (dh + da) <= 0:
return 0.5
return 1.0 / (1.0 + np.exp(-beta * (dh - da)))

View File

@@ -0,0 +1,219 @@
"""Minimal implementation of thesis pricing system.
Implements the core loop: prices -> sessions -> demand -> prices
with behavioral separability and robust pricing objective.
Objects:
- Session trajectories tau_s from mixture of H/A behavioral profiles
- Demand proxy q_hat via weighted action aggregation
- COI leakage penalty for agent reconnaissance
- Limbo: alternating price/demand history for trajectory analysis
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, Tuple
import numpy as np
from .coi import COIWindow, compute_coi_window
from .separability import TRANS_H, TRANS_A, kl_div, build_kernel, compute_divergence, estimate_alpha
ACTION_WEIGHTS = {"add_to_cart": 0.8, "checkout": 0.9, "purchase": 1.0, "view": 0.15, "detail": 0.25, "hover": 0.3, "start": 0.05, "end": 0.0}
@dataclass
class Event:
action: str
product_idx: int
price_seen: float
ts: float
@dataclass
class Session:
sid: str
events: List[Event]
actor: str # H or A (ground truth label)
theta: Dict[str, float] = field(default_factory=dict)
def compute_demand(session: Session) -> float:
"""Compute demand proxy q_hat = sum_k omega(a_k) for session."""
return sum(ACTION_WEIGHTS.get(e.action, 0.1) for e in session.events)
def sample_trajectory(rng: np.random.Generator, trans: Dict, prices: np.ndarray, costs: np.ndarray, theta: Dict[str, float],
is_agent: bool, session_noise: float = 0.02, surge: float = 0.08, max_mult: float = 1.8) -> Tuple[List[Event], int]:
"""Sample session trajectory from behavioral kernel."""
pidx = int(rng.integers(0, len(prices)))
cost, base = float(costs[pidx]), float(prices[pidx]) * (1.0 + rng.normal(0.0, session_noise))
base = float(np.clip(base, cost * 1.01, float(prices[pidx]) * 2.0))
price, signal, state, t = base, 0.0, "start", 0.0
events = []
while state != "end" and len(events) < 30:
probs = trans.get(state, {"end": 1.0})
nxt = rng.choice(list(probs.keys()), p=list(probs.values()))
if nxt == "purchase": # purchase conversion check
rel = max((price - cost) / (cost + 1e-6), 0.0)
p_buy = float(np.clip(theta.get("base_conv", 0.2) * np.exp(-theta.get("price_sens", 2.0) * rel), 0.0, 1.0))
if rng.random() > p_buy:
nxt = "end"
state = nxt
if state not in {"start", "end"}:
events.append(Event(action=state, product_idx=pidx, price_seen=float(price), ts=t))
signal += float(ACTION_WEIGHTS.get(state, 0.1))
price = float(np.clip(base * (1.0 + surge * signal), cost * 1.01, base * max_mult))
t += max(0.2, rng.gamma(1.5, 0.8) if is_agent else rng.gamma(2.0, 1.2))
return events, pidx
def put_prices_to_market(prices: np.ndarray, costs: np.ndarray, alpha: float = 0.2, n_sessions: int = 50,
seed: int | None = None) -> Tuple[List[Session], Dict[str, float]]:
"""Generate sessions from mixture model. Returns sessions and demand mapping sid -> q_hat."""
rng = np.random.default_rng(seed)
sessions, demand = [], {}
for i in range(n_sessions):
sid = f"s{i:04d}"
is_agent = rng.random() < alpha
trans = TRANS_A if is_agent else TRANS_H
theta = {"price_sens": rng.uniform(0.05, 0.2), "base_conv": 0.01} if is_agent else \
{"price_sens": rng.uniform(1.5, 4.0), "base_conv": rng.uniform(0.2, 0.5)}
events, _ = sample_trajectory(rng, trans, prices, costs=costs, theta=theta, is_agent=is_agent)
session = Session(sid=sid, events=events, actor="A" if is_agent else "H", theta=theta)
sessions.append(session)
demand[sid] = compute_demand(session)
return sessions, demand
@dataclass
class LimboUpdate:
utype: str # "prices" or "demand"
data: np.ndarray | Dict[str, float]
t: int
class Limbo:
"""Historical trajectory of alternating price/demand observations."""
def __init__(self):
self.history: List[LimboUpdate] = []
self._t = 0
def add_update(self, utype: str, data: np.ndarray | Dict[str, float]) -> Dict:
self.history.append(LimboUpdate(utype=utype, data=data, t=self._t))
self._t += 1
return {"action": "observe_demand" if utype == "prices" else "set_prices"}
def get_prices_history(self) -> List[np.ndarray]:
return [u.data for u in self.history if u.utype == "prices"]
def get_demand_history(self) -> List[Dict[str, float]]:
return [u.data for u in self.history if u.utype == "demand"]
class System:
"""Main pricing system implementing robust Stackelberg objective.
Manages the alternating loop: set prices p_t -> observe demand Q_hat(p_t) ->
estimate contamination alpha from behavioral signals -> compute next prices.
"""
def __init__(self, n_products: int = 10, costs: np.ndarray | None = None, lambda_coi: float = 0.5, seed: int | None = 42):
self.n = n_products
self.rng = np.random.default_rng(seed)
self.costs = costs if costs is not None else self.rng.uniform(10, 50, n_products)
self.refs = self.costs * (1 + self.rng.uniform(0.2, 0.5, n_products))
self.lambda_coi = lambda_coi
self.limbo = Limbo()
self._alpha_est = 0.2
self._sessions: List[Session] = []
self._last_sessions: List[Session] = []
self._last_coi: COIWindow | None = None
@property
def alpha(self) -> float:
return self._alpha_est
def _estimate_alpha_from_sessions(self) -> float:
if not self._sessions:
return self._alpha_est
return float(np.mean([estimate_alpha(s) for s in self._sessions[-50:]]))
def _revenue_under_demand(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
agg = np.zeros(self.n)
for sid, q in demand.items():
sess = next((s for s in self._sessions if s.sid == sid), None)
if sess and sess.events:
agg[sess.events[0].product_idx] += q
return float(np.dot(prices, agg))
def _compute_coi_window(self, demand: Dict[str, float]) -> COIWindow:
if not self._last_sessions:
zeros = np.zeros(self.n, dtype=float)
return COIWindow(policy=0.0, agent=0.0, leak=0.0, survival_ratio=0.0,
policy_by_product=zeros, agent_by_product=zeros, demand_weights=zeros)
return compute_coi_window(self._last_sessions, self.costs, demand_mapping=demand)
def _objective(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
"""Robust objective: R(p,d) - lambda * COI_leak."""
profit = self._revenue_under_demand(prices, demand) - float(np.sum(self.costs))
self._last_coi = self._compute_coi_window(demand)
return profit - self.lambda_coi * self._last_coi.leak
def compute_prices(self, demand: Dict[str, float] | None = None) -> np.ndarray:
"""Compute next prices via heuristic margin adjustment based on alpha estimate."""
self._alpha_est = self._estimate_alpha_from_sessions()
margin_scale = 1.0 - 0.5 * self._alpha_est # defensive pricing under high contamination
margins = (self.refs - self.costs) * margin_scale
noise = self.rng.normal(0, 0.02, self.n) * self.costs
prices = np.clip(self.costs + margins + noise, self.costs * 1.02, self.refs * 1.3)
self.limbo.add_update("prices", prices)
return prices
def observe_demand(self, prices: np.ndarray, alpha_true: float = 0.2, n_sessions: int = 50) -> Dict[str, float]:
sessions, demand_map = put_prices_to_market(prices, costs=self.costs, alpha=alpha_true,
n_sessions=n_sessions, seed=int(self.rng.integers(0, 10000)))
self._last_sessions = sessions
self._sessions.extend(sessions)
self.limbo.add_update("demand", demand_map)
return demand_map
def step(self, alpha_true: float = 0.2, n_sessions: int = 50) -> Tuple[np.ndarray, Dict[str, float], float, COIWindow]:
demand_hist = self.limbo.get_demand_history()
prices = self.compute_prices(demand_hist[-1] if demand_hist else None)
demand = self.observe_demand(prices, alpha_true, n_sessions)
reward = self._objective(prices, demand)
return prices, demand, reward, self._last_coi or self._compute_coi_window(demand)
def run(self, n_steps: int = 100, alpha_true: float = 0.2) -> Dict:
traj = {"prices": [], "demand": [], "rewards": [], "alpha_est": [], "alpha_true": alpha_true,
"coi_policy": [], "coi_agent": [], "coi_leak": [], "coi_survival": []}
for _ in range(n_steps):
p, d, r, coi = self.step(alpha_true)
traj["prices"].append(p); traj["demand"].append(d); traj["rewards"].append(r)
traj["alpha_est"].append(self._alpha_est)
traj["coi_policy"].append(coi.policy); traj["coi_agent"].append(coi.agent)
traj["coi_leak"].append(coi.leak); traj["coi_survival"].append(coi.survival_ratio)
return traj
if __name__ == "__main__":
sys = System(n_products=5, seed=42)
traj = sys.run(n_steps=20, alpha_true=0.25)
print(f"avg reward: {np.mean(traj['rewards']):.2f}, final alpha_hat: {traj['alpha_est'][-1]:.3f}, "
f"COI_policy: {np.mean(traj['coi_policy']):.3f}, COI_agent: {np.mean(traj['coi_agent']):.3f}, leak: {np.mean(traj['coi_leak']):.3f}")
prices = np.array([20.0, 35.0, 50.0, 25.0, 40.0])
costs = np.array([15.0, 28.0, 40.0, 18.0, 30.0])
sessions, demand = put_prices_to_market(prices, costs=costs, alpha=0.3, n_sessions=20, seed=123)
print(f'sessions: {len(sessions)}, agents: {sum(1 for s in sessions if s.actor=="A")}')
for n in [1, 5, 10, 50, 100]:
# theoretical: erosion = 1 - 2/(N+1) for uniform order statistic
print(f'N={n:3d} agents -> COI erosion: {1.0 - 2.0/(n+1):.3f}')
events = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.5), Event('cart', 0, 20.0, 1.0), Event('purchase', 0, 20.0, 2.0)]
print(f'human-like session alpha_hat: {estimate_alpha(Session(sid="test", events=events, actor="H")):.3f}')
events_a = [Event('view', 0, 20.0, 0.1), Event('detail', 0, 20.0, 0.2), Event('view', 0, 20.0, 0.3), Event('detail', 0, 20.0, 0.4)]
print(f'agent-like session alpha_hat: {estimate_alpha(Session(sid="test2", events=events_a, actor="A")):.3f}')

View File

@@ -0,0 +1,249 @@
"""Gymnasium-compatible RL environment for thesis pricing system.
Wraps simplified.System with standard Gym interface for training pricing policies.
Supports multiple reward modes and contamination scenarios.
Action: price multipliers [0.5, 1.5] applied to reference prices
Observation: [prices, demand_agg, alpha_est, margins, position_proxy]
Reward: configurable objective (revenue, profit, robust, coi-aware)
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, Tuple
import numpy as np
try:
import gymnasium as gym
from gymnasium import spaces
HAS_GYM = True
except ImportError:
HAS_GYM = False
from .simplified import System, Session, Event, Limbo, put_prices_to_market, compute_demand, estimate_alpha
from .coi import COIWindow, compute_coi_window, coi_erosion
@dataclass
class EnvConfig:
n_products: int = 5
max_steps: int = 200
sessions_per_step: int = 30
alpha_true: float = 0.2
alpha_drift: float = 0.0
alpha_bounds: Tuple[float, float] = (0.0, 0.6)
lambda_coi: float = 0.5
lambda_vol: float = 0.1
reward_mode: str = "robust" # revenue | profit | robust | coi_aware
normalize_reward: bool = True
seed: int | None = 42
def aggregate_purchases(sessions: list[Session], n_products: int, costs: np.ndarray) -> Tuple[np.ndarray, float, float]:
"""Aggregate purchases from sessions, returns (counts, revenue, cost)."""
purchases = np.zeros(n_products, dtype=float)
revenue, cost = 0.0, 0.0
for sess in sessions:
for e in sess.events:
if e.action == "purchase" and 0 <= e.product_idx < n_products:
purchases[e.product_idx] += 1.0
revenue += float(e.price_seen)
cost += float(costs[e.product_idx])
return purchases, revenue, cost
class PricingEnv(gym.Env if HAS_GYM else object):
"""RL environment for dynamic pricing under agent contamination.
Platform sets prices p_t, market responds with mixture demand Q(p) = (1-alpha)*D_H + alpha*D_A.
Agent estimates contamination alpha_hat from behavioral signals.
Reward balances profit vs COI leakage.
"""
metadata = {"render_modes": ["human", "ansi"]}
def __init__(self, cfg: EnvConfig | None = None):
if not HAS_GYM:
raise ImportError("gymnasium required")
self.cfg = cfg or EnvConfig()
self.n = self.cfg.n_products
self._sys: System | None = None
self._t = 0
self._alpha = self.cfg.alpha_true
self._last_prices: np.ndarray | None = None
self._last_demand: Dict[str, float] | None = None
self._episode_rewards: list[float] = []
self._demand_agg = np.zeros(self.n)
self.action_space = spaces.Box(low=0.5, high=1.5, shape=(self.n,), dtype=np.float32)
obs_dim = self.n + self.n + 1 + 1 + self.n + 1 # prices + demand + alpha_hat + alpha + margins + t
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
def _build_obs(self) -> np.ndarray:
if self._sys is None:
return np.zeros(self.observation_space.shape[0], dtype=np.float32)
prices = self._last_prices if self._last_prices is not None else self._sys.refs
return np.concatenate([
prices / (self._sys.refs + 1e-6),
self._demand_agg / (np.sum(self._demand_agg) + 1e-6),
[self._sys.alpha, self._alpha],
(prices - self._sys.costs) / (self._sys.costs + 1e-6),
[self._t / self.cfg.max_steps],
]).astype(np.float32)
def _compute_reward(self, prices: np.ndarray, demand: Dict[str, float]) -> float:
cfg, sys = self.cfg, self._sys
if sys is None:
return 0.0
# aggregate demand per product
agg = np.zeros(self.n)
for sid, q in demand.items():
sess = next((s for s in sys._sessions if s.sid == sid), None)
if sess and sess.events:
agg[sess.events[0].product_idx] += q
self._demand_agg = agg
_, revenue, cost = aggregate_purchases(sys._last_sessions, self.n, sys.costs)
profit = revenue - cost
vol_penalty = 0.0
if self._last_prices is not None:
vol_penalty = cfg.lambda_vol * float(np.mean(np.abs(prices - self._last_prices) / (sys.refs + 1e-6)))
coi = compute_coi_window(sys._last_sessions, sys.costs, demand_mapping=demand)
leak = float(coi.leak)
reward_fns = {
"revenue": lambda: revenue,
"profit": lambda: profit,
"robust": lambda: profit - cfg.lambda_coi * leak - vol_penalty,
"coi_aware": lambda: profit - cfg.lambda_coi * (1 + 2 * sys.alpha) * leak - vol_penalty,
}
r = reward_fns.get(cfg.reward_mode, lambda: profit)()
return float(r / (float(np.sum(sys.refs)) + 1e-6)) if cfg.normalize_reward else float(r)
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
seed = seed if seed is not None else self.cfg.seed
self._sys = System(n_products=self.n, lambda_coi=self.cfg.lambda_coi, seed=seed)
self._t, self._alpha = 0, self.cfg.alpha_true
self._last_prices, self._last_demand = None, None
self._episode_rewards, self._demand_agg = [], np.zeros(self.n)
return self._build_obs(), {"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
"costs": self._sys.costs.copy(), "refs": self._sys.refs.copy()}
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
if self._sys is None:
raise RuntimeError("call reset() first")
action = np.clip(action, 0.5, 1.5)
prices = np.clip(self._sys.refs * action.astype(np.float64), self._sys.costs * 1.01, self._sys.refs * 2.0)
demand = self._sys.observe_demand(prices, alpha_true=self._alpha, n_sessions=self.cfg.sessions_per_step)
self._sys.limbo.add_update("prices", prices)
self._sys._alpha_est = self._sys._estimate_alpha_from_sessions()
reward = self._compute_reward(prices, demand)
self._episode_rewards.append(reward)
self._last_prices, self._last_demand = prices.copy(), demand
self._t += 1
# compute info metrics using shared helper
purchases, revenue, cost = aggregate_purchases(self._sys._last_sessions, self.n, self._sys.costs)
n_agents = int(self._alpha * self.cfg.sessions_per_step)
coi = compute_coi_window(self._sys._last_sessions, self._sys.costs, demand_mapping=demand)
info = {
"alpha_true": self._alpha, "alpha_est": self._sys.alpha,
"alpha_error": abs(self._alpha - self._sys.alpha),
"revenue": float(revenue), "profit": float(revenue - cost), "cost": float(cost),
"n_purchases": int(np.sum(purchases)),
"avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
"n_sessions": len(demand), "n_agents": n_agents, "price_std": float(np.std(prices)),
"coi_erosion": coi_erosion(coi.policy, coi.agent),
"coi_policy": float(coi.policy), "coi_agent": float(coi.agent),
"coi_leakage": float(coi.leak), "coi_survival": float(coi.survival_ratio),
"cumulative_reward": sum(self._episode_rewards), "step": self._t,
}
return self._build_obs(), reward, self._t >= self.cfg.max_steps, False, info
def render(self, mode: str = "human") -> str | None:
if self._sys is None or self._last_prices is None:
return None
out = f"t={self._t}/{self.cfg.max_steps} | alpha_true={self._alpha:.3f} alpha_hat={self._sys.alpha:.3f} | " \
f"prices: {self._last_prices.round(1)} | demand: {self._demand_agg.round(2)} | " \
f"reward: {self._episode_rewards[-1] if self._episode_rewards else 0:.3f}"
if mode == "human":
print(out)
return out
def close(self) -> None:
pass
class ContaminationSweepEnv(PricingEnv):
"""Environment that sweeps through contamination levels during training."""
def __init__(self, cfg: EnvConfig | None = None, alpha_schedule: list[float] | None = None):
super().__init__(cfg)
self._schedule = alpha_schedule or [0.1, 0.2, 0.3, 0.4, 0.5]
self._schedule_idx = 0
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
if options and options.get("advance_schedule", False):
self._schedule_idx = (self._schedule_idx + 1) % len(self._schedule)
self.cfg.alpha_true = self._schedule[self._schedule_idx]
return super().reset(seed, options)
class AdversarialEnv(PricingEnv):
"""Environment with adversarial contamination dynamics.
Contamination increases when prices are predictable (agents exploit).
"""
def __init__(self, cfg: EnvConfig | None = None, exploitation_rate: float = 0.02):
super().__init__(cfg)
self._exploit_rate = exploitation_rate
self._price_history: list[np.ndarray] = []
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, dict]:
obs, reward, term, trunc, info = super().step(action)
if self._last_prices is not None:
self._price_history.append(self._last_prices.copy())
predictability = 0.0
if len(self._price_history) > 10:
predictability = 1.0 / (float(np.std(self._price_history[-10:])) + 0.1)
self._alpha = np.clip(self._alpha + self._exploit_rate * predictability * self._sys.rng.random(), *self.cfg.alpha_bounds)
info["predictability"] = predictability
return obs, reward, term, trunc, info
def reset(self, seed: int | None = None, options: dict | None = None) -> Tuple[np.ndarray, dict]:
self._price_history = []
return super().reset(seed, options)
def make_env(cfg: EnvConfig | None = None, env_type: str = "standard") -> PricingEnv:
return {"sweep": ContaminationSweepEnv, "adversarial": AdversarialEnv}.get(env_type, PricingEnv)(cfg)
# baseline policies
fixed_price_policy = lambda refs, margin=0.0: np.ones(len(refs), dtype=np.float32) * (1.0 + margin)
random_policy = lambda n, rng=None: (rng or np.random.default_rng()).uniform(0.7, 1.3, n).astype(np.float32)
adaptive_policy = lambda obs, n, base=0.1: np.ones(n, dtype=np.float32) * (1.0 + base * (1.0 - 0.4 * obs[2 * n]))
if __name__ == "__main__":
cfg = EnvConfig(n_products=100, max_steps=100, alpha_true=0.25, reward_mode="robust")
env = make_env(cfg)
obs, info = env.reset()
print(f"initial: alpha={info['alpha_true']:.2f}")
total_reward = 0.0
for t in range(cfg.max_steps):
action = adaptive_policy(obs, cfg.n_products)
obs, reward, done, _, info = env.step(action)
total_reward += reward
if t % 10 == 0:
env.render()
if done:
break
print(f"\ntotal reward: {total_reward:.2f}, final alpha_hat: {info['alpha_est']:.3f}")

View File

@@ -0,0 +1,168 @@
"""Summarize TensorBoard logs into comparison tables."""
from __future__ import annotations
import json
import re
from pathlib import Path
from collections import defaultdict
from dataclasses import dataclass
import pandas as pd
try:
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
HAS_TB = True
except ImportError:
HAS_TB = False
@dataclass
class RunInfo:
algo: str
alpha: float
reward_mode: str
path: Path
def parse_run_name(name: str) -> RunInfo | None:
"""Extract algo, alpha, reward_mode from run directory name."""
# patterns: ppo_a0.20_robust, cmp_fixed_a0.20, sac_a0.90_robust
m = re.match(r'(cmp_)?(\w+)_a([\d.]+)_?(\w+)?', name)
if not m:
return None
prefix, algo, alpha, mode = m.groups()
return RunInfo(algo=algo, alpha=float(alpha), reward_mode=mode or 'robust', path=Path())
def load_tb_scalars(log_dir: Path, tags: list[str], reduce: str = 'last') -> dict[str, float]:
"""Load scalar values from TensorBoard event files."""
if not HAS_TB:
return {}
ea = EventAccumulator(str(log_dir))
ea.Reload()
results = {}
for tag in tags:
if tag in ea.Tags().get('scalars', []):
events = ea.Scalars(tag)
if not events:
continue
vals = [e.value for e in events]
if reduce == 'last':
results[tag] = vals[-1]
elif reduce == 'mean':
results[tag] = sum(vals) / len(vals)
elif reduce == 'max':
results[tag] = max(vals)
elif reduce == 'min':
results[tag] = min(vals)
return results
def load_json_results(log_dir: Path) -> dict[str, float]:
"""Load metrics from results.json if available."""
results_file = log_dir / 'results.json'
if results_file.exists():
with open(results_file) as f:
return json.load(f)
return {}
def discover_runs(base_dir: Path) -> list[RunInfo]:
"""Find all experiment runs in base directory."""
runs = []
for d in base_dir.iterdir():
if not d.is_dir():
continue
info = parse_run_name(d.name)
if info:
info.path = d
runs.append(info)
return runs
def build_tables(runs: list[RunInfo], metrics: list[str], reduce: str = 'last') -> dict[str, dict[str, pd.DataFrame]]:
"""Build pivot tables: reward_mode -> metric -> DataFrame[alpha x algo]."""
# collect data: {reward_mode: {metric: {(alpha, algo): value}}}
data = defaultdict(lambda: defaultdict(dict))
tb_tags = [f'economics/{m}' if m in ['revenue', 'profit', 'margin'] else f'coi/{m}' if m in ['erosion', 'leakage'] else f'alpha/{m}' for m in metrics]
tag_map = dict(zip(tb_tags, metrics))
for run in runs:
# try json first (final eval metrics)
jm = load_json_results(run.path)
tb = load_tb_scalars(run.path, tb_tags, reduce)
for tag, metric in tag_map.items():
val = None
json_key = f'{metric}_mean' if metric != 'reward' else 'reward_mean'
if json_key in jm:
val = jm[json_key]
elif tag in tb:
val = tb[tag]
if val is not None:
data[run.reward_mode][metric][(run.alpha, run.algo)] = val
# convert to DataFrames
tables = {}
for mode, metrics_data in data.items():
tables[mode] = {}
for metric, vals in metrics_data.items():
if not vals:
continue
alphas = sorted(set(a for a, _ in vals.keys()))
algos = sorted(set(al for _, al in vals.keys()))
df = pd.DataFrame(index=alphas, columns=algos, dtype=float)
for (a, al), v in vals.items():
df.loc[a, al] = v
df.index.name = 'alpha'
tables[mode][metric] = df
return tables
def format_table(df: pd.DataFrame, fmt: str = '.3f') -> str:
"""Format DataFrame as markdown table."""
return df.to_markdown(floatfmt=fmt)
def summarize(base_dir: str = 'sim/case/thesis_simplified/runs',
metrics: list[str] | None = None,
reduce: str = 'last',
output: str | None = None) -> dict:
"""Generate summary tables from experiment runs."""
base = Path(base_dir)
metrics = metrics or ['revenue', 'profit', 'margin', 'erosion', 'leakage']
runs = discover_runs(base)
if not runs:
print(f"No runs found in {base}")
return {}
print(f"Found {len(runs)} runs")
tables = build_tables(runs, metrics, reduce)
lines = []
for mode, metric_tables in sorted(tables.items()):
lines.append(f"\n# Reward Mode: {mode}\n")
for metric, df in sorted(metric_tables.items()):
lines.append(f"\n## {metric}\n")
lines.append(format_table(df))
lines.append("")
report = '\n'.join(lines)
print(report)
if output:
Path(output).write_text(report)
print(f"\nSaved to {output}")
return tables
if __name__ == '__main__':
import argparse
p = argparse.ArgumentParser()
p.add_argument('--dir', default='sim/case/thesis_simplified/runs')
p.add_argument('--metrics', nargs='+', default=['revenue', 'profit', 'margin', 'erosion', 'leakage'])
p.add_argument('--reduce', default='last', choices=['last', 'mean', 'max', 'min'])
p.add_argument('--output', '-o', help='save markdown to file')
args = p.parse_args()
summarize(args.dir, args.metrics, args.reduce, args.output)

View File

@@ -0,0 +1,336 @@
"""RL training for thesis pricing system with thesis-aligned metrics.
Trains pricing policies using stable-baselines3 with TensorBoard logging.
Tracks COI erosion, alpha estimation error, and economic KPIs per thesis formulation.
"""
from __future__ import annotations
import argparse
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from dataclasses import dataclass, asdict, field
from pathlib import Path
from typing import Dict, List, Callable, Any
import numpy as np
try:
from stable_baselines3 import PPO, SAC, A2C
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
HAS_SB3 = True
except ImportError:
HAS_SB3 = False
try:
from torch.utils.tensorboard import SummaryWriter
HAS_TB = True
except ImportError:
HAS_TB = False
from .simplified_env import PricingEnv, EnvConfig, make_env, adaptive_policy, fixed_price_policy, random_policy
@dataclass
class EpisodeMetrics:
reward: float = 0.0
revenue: float = 0.0
profit: float = 0.0
coi_erosion: float = 0.0
coi_leakage: float = 0.0
alpha_error: float = 0.0
avg_margin: float = 0.0
n_agents: int = 0
steps: int = 0
def accumulate(self, info: Dict[str, Any]) -> None:
self.steps += 1
self.reward += info.get('reward', 0)
self.revenue += info.get('revenue', 0)
self.profit += info.get('profit', 0)
self.coi_erosion += info.get('coi_erosion', 0)
self.coi_leakage += info.get('coi_leakage', 0)
self.alpha_error += abs(info.get('alpha_true', 0) - info.get('alpha_est', 0))
self.avg_margin += info.get('avg_margin', 0)
self.n_agents += info.get('n_agents', 0)
def normalized(self) -> Dict[str, float]:
s = max(self.steps, 1)
return {k: getattr(self, k) / s for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin', 'n_agents']}
@dataclass
class ExperimentConfig:
algo: str = "ppo"
total_timesteps: int = 100_000
n_envs: int = 4
eval_freq: int = 5000
n_eval_episodes: int = 10
log_dir: str = "sim/case/thesis_simplified/runs"
seed: int = 42
n_products: int = 10
max_steps: int = 200
alpha_true: float = 0.2
reward_mode: str = "robust"
experiment_name: str | None = None
def __post_init__(self):
if self.experiment_name is None:
self.experiment_name = f"{self.algo}_a{self.alpha_true:.2f}_{self.reward_mode}"
class Policy:
"""Unified policy interface for baselines and trained models."""
def __init__(self, policy_fn: Callable[[np.ndarray, int], np.ndarray], name: str):
self._fn, self.name = policy_fn, name
def predict(self, obs: np.ndarray, deterministic: bool = True) -> tuple[np.ndarray, None]:
return self._fn(obs, (len(obs) - 3) // 3), None
@staticmethod
def fixed(margin: float = 0.15) -> "Policy":
return Policy(lambda obs, n: fixed_price_policy(np.ones(n), margin), f"fixed_{margin:.2f}")
@staticmethod
def adaptive(base_margin: float = 0.15) -> "Policy":
return Policy(lambda obs, n: adaptive_policy(obs, n, base_margin), f"adaptive_{base_margin:.2f}")
@staticmethod
def random() -> "Policy":
return Policy(lambda obs, n: random_policy(n), "random")
@staticmethod
def myopic(greed: float = 0.3) -> "Policy":
def _fn(obs: np.ndarray, n: int) -> np.ndarray:
demand_norm = obs[n:2*n] if len(obs) > 2*n else np.ones(n) * 0.5
return np.ones(n, dtype=np.float32) * np.clip(1.0 + greed * (1 + np.mean(demand_norm)), 0.5, 1.5)
return Policy(_fn, f"myopic_{greed:.1f}")
def log_metrics(writer: SummaryWriter | None, metrics: Dict[str, float], prefix: str, step: int) -> None:
if writer is None:
return
for k, v in metrics.items():
writer.add_scalar(f'{prefix}/{k}', v, step)
class MetricsCallback(BaseCallback):
def __init__(self, writer: SummaryWriter | None, verbose: int = 0):
super().__init__(verbose)
self._writer = writer
def _on_step(self) -> bool:
if self._writer is None:
return True
for info in self.locals.get('infos', []):
t = self.num_timesteps
self._writer.add_scalar('economics/revenue', info.get('revenue', 0), t)
self._writer.add_scalar('economics/profit', info.get('profit', 0), t)
self._writer.add_scalar('economics/margin', info.get('avg_margin', 0), t)
self._writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), t)
self._writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), t)
self._writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), t)
self._writer.add_scalar('agents/count', info.get('n_agents', 0), t)
return True
def make_vec_env(cfg: ExperimentConfig, n_envs: int = 1) -> DummyVecEnv:
def _make():
return Monitor(make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed)))
return DummyVecEnv([_make for _ in range(n_envs)])
def run_episodes(policy: Policy | Any, env: PricingEnv, n_episodes: int) -> List[EpisodeMetrics]:
"""Run policy for n episodes and collect metrics."""
metrics = []
for _ in range(n_episodes):
obs, _ = env.reset()
ep, done = EpisodeMetrics(), False
while not done:
action, _ = policy.predict(obs, deterministic=True)
obs, reward, term, trunc, info = env.step(action)
done = term or trunc
ep.accumulate(info)
ep.reward += reward
metrics.append(ep)
return metrics
def evaluate_policy(policy: Policy | Any, cfg: ExperimentConfig, n_episodes: int = 20) -> Dict[str, float]:
env = make_env(EnvConfig(n_products=cfg.n_products, max_steps=cfg.max_steps,
alpha_true=cfg.alpha_true, reward_mode=cfg.reward_mode, seed=cfg.seed + 999))
metrics = run_episodes(policy, env, n_episodes)
return {
'reward_mean': np.mean([m.reward for m in metrics]), 'reward_std': np.std([m.reward for m in metrics]),
**{f'{k}_mean': np.mean([m.normalized()[k] for m in metrics])
for k in ['revenue', 'profit', 'coi_erosion', 'coi_leakage', 'alpha_error', 'avg_margin']},
}
def run_baseline(policy: Policy, vec_env: DummyVecEnv, total_steps: int, writer: SummaryWriter | None):
obs, n_envs = vec_env.reset(), vec_env.num_envs
ep_rewards = np.zeros(n_envs)
for step in range(0, total_steps, n_envs):
actions = np.array([policy.predict(obs[i])[0] for i in range(n_envs)])
obs, rewards, dones, infos = vec_env.step(actions)
ep_rewards += rewards
for i, info in enumerate(infos):
if writer:
writer.add_scalar('economics/revenue', info.get('revenue', 0), step)
writer.add_scalar('economics/profit', info.get('profit', 0), step)
writer.add_scalar('economics/margin', info.get('avg_margin', 0), step)
writer.add_scalar('coi/erosion', info.get('coi_erosion', 0), step)
writer.add_scalar('coi/leakage', info.get('coi_leakage', 0), step)
writer.add_scalar('alpha/estimation_error', abs(info.get('alpha_true', 0) - info.get('alpha_est', 0)), step)
writer.add_scalar('agents/count', info.get('n_agents', 0), step)
if dones[i]:
if writer:
writer.add_scalar('rollout/ep_reward', ep_rewards[i], step)
ep_rewards[i] = 0
def train(cfg: ExperimentConfig) -> Dict[str, Any]:
is_baseline = cfg.algo.lower() in ["fixed", "adaptive", "random", "myopic"]
if not HAS_SB3 and not is_baseline:
raise ImportError("stable-baselines3 required: pip install stable-baselines3[extra]")
log_path = Path(cfg.log_dir) / cfg.experiment_name
log_path.mkdir(parents=True, exist_ok=True)
with open(log_path / "config.json", "w") as f:
json.dump(asdict(cfg), f, indent=2)
writer = SummaryWriter(log_path) if HAS_TB else None
train_env, eval_env = make_vec_env(cfg, cfg.n_envs), make_vec_env(cfg, 1)
if is_baseline:
policy = {"fixed": Policy.fixed, "adaptive": Policy.adaptive, "random": Policy.random, "myopic": Policy.myopic}[cfg.algo.lower()]()
run_baseline(policy, train_env, cfg.total_timesteps, writer)
final_metrics = evaluate_policy(policy, cfg)
else:
algo_cls = {"ppo": PPO, "sac": SAC, "a2c": A2C}[cfg.algo.lower()]
common = dict(verbose=1, seed=cfg.seed, tensorboard_log=str(log_path), device="auto")
model = {
"ppo": lambda: PPO("MlpPolicy", train_env, learning_rate=3e-4, n_steps=2048, batch_size=64, n_epochs=10, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01, **common),
"sac": lambda: SAC("MlpPolicy", train_env, learning_rate=1e-4, buffer_size=50_000, batch_size=512, tau=0.02, gamma=0.99, learning_starts=1000, ent_coef="auto_0.1", train_freq=4, **common),
"a2c": lambda: A2C("MlpPolicy", train_env, learning_rate=7e-4, n_steps=5, gamma=0.99, **common),
}[cfg.algo.lower()]()
cb = MetricsCallback(writer)
eval_cb = EvalCallback(eval_env, best_model_save_path=str(log_path / "best"), log_path=str(log_path),
eval_freq=cfg.eval_freq, n_eval_episodes=cfg.n_eval_episodes, deterministic=True)
model.learn(cfg.total_timesteps, callback=[cb, eval_cb], progress_bar=True)
model.save(log_path / "final_model")
policy = model
final_metrics = evaluate_policy(model, cfg)
if writer:
log_metrics(writer, final_metrics, 'final', cfg.total_timesteps)
writer.close()
train_env.close(); eval_env.close()
with open(log_path / "results.json", "w") as f:
json.dump(final_metrics, f, indent=2)
return {"path": str(log_path), "metrics": final_metrics}
def _train_alpha(args: tuple) -> tuple[str, Dict]:
"""Worker for parallel sweep - must be top-level for pickling."""
cfg_dict, alpha = args
cfg_dict["alpha_true"] = alpha
cfg_dict["experiment_name"] = f"{cfg_dict['algo']}_a{alpha:.2f}_{cfg_dict['reward_mode']}"
sweep_cfg = ExperimentConfig(**cfg_dict)
print(f"[alpha={alpha:.2f}] starting")
metrics = train(sweep_cfg)["metrics"]
print(f"[alpha={alpha:.2f}] done")
return f"alpha_{alpha:.2f}", metrics
def run_sweep(cfg: ExperimentConfig, alphas: List[float] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
alphas = alphas or [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
cfg_dict = asdict(cfg)
if max_workers == 1: # sequential fallback
results = dict(_train_alpha((cfg_dict.copy(), a)) for a in alphas)
else:
with ProcessPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(_train_alpha, (cfg_dict.copy(), a)): a for a in alphas}
results = {}
for fut in as_completed(futures):
key, metrics = fut.result()
results[key] = metrics
summary_path = Path(cfg.log_dir) / f"sweep_{cfg.algo}_{cfg.reward_mode}.json"
with open(summary_path, "w") as f:
json.dump(results, f, indent=2)
print(f"\nSweep results saved to {summary_path}")
return results
def _train_policy(args: tuple) -> tuple[str, Dict]:
"""Worker for parallel policy comparison."""
cfg_dict, algo = args
cfg_dict["algo"] = algo
cfg_dict["experiment_name"] = f"cmp_{algo}_a{cfg_dict['alpha_true']:.2f}"
cmp_cfg = ExperimentConfig(**cfg_dict)
print(f"[{algo}] starting")
metrics = train(cmp_cfg)["metrics"]
print(f"[{algo}] done")
return algo, metrics
def compare_policies(cfg: ExperimentConfig, policies: List[str] | None = None, max_workers: int | None = None) -> Dict[str, Dict]:
policies = policies or ["fixed", "adaptive", "myopic", "random"]
cfg_dict = asdict(cfg)
if max_workers == 1:
results = dict(_train_policy((cfg_dict.copy(), p)) for p in policies)
else:
with ProcessPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(_train_policy, (cfg_dict.copy(), p)): p for p in policies}
results = {}
for fut in as_completed(futures):
algo, metrics = fut.result()
results[algo] = metrics
cmp_path = Path(cfg.log_dir) / f"compare_a{cfg.alpha_true:.2f}.json"
with open(cmp_path, "w") as f:
json.dump(results, f, indent=2)
print(f"\nComparison saved to {cmp_path}")
for algo, m in results.items():
print(f" {algo:12s}: reward={m['reward_mean']:.2f} coi_erosion={m['coi_erosion_mean']:.4f} alpha_err={m['alpha_error_mean']:.4f}")
return results
def main():
parser = argparse.ArgumentParser(description="Train RL pricing policies")
parser.add_argument("--algo", default="ppo", choices=["ppo", "sac", "a2c", "fixed", "adaptive", "random", "myopic"])
parser.add_argument("--steps", type=int, default=100_000)
parser.add_argument("--alpha", type=float, default=0.2)
parser.add_argument("--reward-mode", default="robust", choices=["revenue", "profit", "robust", "coi_aware"])
parser.add_argument("--n-products", type=int, default=10)
parser.add_argument("--n-envs", type=int, default=4)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--log-dir", default="sim/case/thesis_simplified/runs")
parser.add_argument("--sweep", action="store_true", help="run contamination sweep")
parser.add_argument("--compare", action="store_true", help="compare all baselines")
parser.add_argument("--workers", type=int, default=None, help="max parallel workers for sweep (None=auto, 1=sequential)")
args = parser.parse_args()
cfg = ExperimentConfig(algo=args.algo, total_timesteps=args.steps, alpha_true=args.alpha,
reward_mode=args.reward_mode, n_products=args.n_products,
n_envs=args.n_envs, seed=args.seed, log_dir=args.log_dir)
if args.sweep:
run_sweep(cfg, max_workers=args.workers)
elif args.compare:
compare_policies(cfg, max_workers=args.workers)
else:
result = train(cfg)
print(f"\nTraining complete: {result['path']}")
print(f"Metrics: {json.dumps(result['metrics'], indent=2)}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,97 @@
import os
import json
from pydantic import BaseModel as Base
class PayloadModel(Base):
sessionId: str
experimentId: str | None
eventName: str
page: str | None
productId: str | None
metadata: dict
storeMode: str
userAgent: str
ts: str
class ValueModel(Base):
payload: PayloadModel
encoding: str
isPayloadNull: bool
schemaId: int
size: int
class InteractionModel(Base):
partitionID: int
offset: int
timestamp: int
compression: str
isTransactional: bool
headers: list
key: dict
value: ValueModel
def _is_admin(page: str | None) -> bool:
return page is not None and page.startswith("/admin/")
class Loader:
def __init__(self, src_dir: str):
self.src_dir = src_dir
self.entries = os.listdir(src_dir)
if not self.entries: raise ValueError("empty directory")
self.data = self._load_sessions()
def _load_sessions(self) -> dict:
sessions = {}
for entry in self.entries:
with open(f"{self.src_dir}/{entry}/int.json") as f:
raw = json.load(f)
ints = [InteractionModel(**i) for i in raw]
sessions[entry] = [i for i in ints if not _is_admin(i.value.payload.page)]
return sessions
def get_data(self) -> dict:
return self.data
def get_entries(self) -> tuple[list[str], int]:
return self.entries, len(self.entries)
class AgentLoader(Loader):
def _load_sessions(self) -> dict:
sessions = {}
for entry in self.entries:
with open(f"{self.src_dir}/{entry}/int.json") as f:
raw = json.load(f)
ints = [PayloadModel(**i) for i in raw]
sessions[entry] = [i for i in ints if not _is_admin(i.page)]
return sessions
class JointLoader:
def __init__(self, human_dir: str, agent_dir: str):
self.human_loader = Loader(human_dir)
self.agent_loader = AgentLoader(agent_dir)
self.data = self._merge()
self.entries = list(self.data.keys())
def _merge(self) -> dict:
return {
**{f"human_{sid}": [e.value.payload for e in evts]
for sid, evts in self.human_loader.get_data().items()},
**{f"agent_{sid}": evts
for sid, evts in self.agent_loader.get_data().items()}
}
def get_data(self) -> dict:
return self.data
def get_entries(self) -> tuple[list[str], int]:
return self.entries, len(self.entries)
if __name__ == "__main__":
agent_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/agents/collected_data/"
human_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/"
for name, cls, path in [("agent", AgentLoader, agent_dir),
("human", Loader, human_dir),
("joint", lambda d: JointLoader(human_dir, d), agent_dir)]:
ldr = cls(path) if name != "joint" else cls(agent_dir)
print(f"Loaded {len(ldr.get_entries()[0])} {name} sessions")

View File

@@ -0,0 +1,256 @@
try:
from loader import Loader, AgentLoader, JointLoader
except ImportError:
from sim.rl.behavior_loader.loader import Loader, AgentLoader, JointLoader
from collections import defaultdict
from typing import Dict, List, Tuple, Set
import numpy as np
import graphviz
import sys
from pathlib import Path
# import lib utilities for optional use - models keep their own _state_repr for backwards compat
# with the specific event structure (evt.value.payload)
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / 'lib'))
try:
from lib.state import make_state_repr as lib_make_state_repr
from lib.features import transition_histogram as lib_transition_histogram
except ImportError:
lib_make_state_repr = None
lib_transition_histogram = None
class BehaviorModel:
def __init__(self, src_dir: str, loader_cls=Loader):
self.loader = loader_cls(src_dir)
self.data = self.loader.get_data()
self.entries, self.num_entries = self.loader.get_entries()
self.mdp = None
def _state_repr(self, evt) -> str:
p = evt.value.payload
return f"{p.page or 'unk'}|{p.productId or 'none'}|{p.eventName}"
def _sort_key(self, evt):
return evt.timestamp
def _extract_sessions(self) -> List[List[str]]:
trajs = []
for evts in self.data.values():
if len(evts) < 2: continue
states = [self._state_repr(e) for e in sorted(evts, key=self._sort_key)]
trajs.append(states)
return trajs
def _calc_transitions(self, trajs: List[List[str]]) -> Tuple[Dict, Set]:
trans, states = defaultdict(lambda: defaultdict(int)), set()
for traj in trajs:
for s, s_next in zip(traj, traj[1:]):
trans[s][s_next] += 1
states.update([s, s_next])
return trans, states
def _calc_rewards(self, trajs: List[List[str]]) -> Dict:
rwd = defaultdict(list)
for traj in trajs:
n = len(traj)
for i, s in enumerate(traj):
rwd[s].append(i / n)
return rwd
def _normalize_trans(self, cnts: Dict) -> Dict:
return {s: {s_n: cnt/sum(nxt.values()) for s_n, cnt in nxt.items()}
for s, nxt in cnts.items()}
def build_MDP(self) -> Dict:
trajs = self._extract_sessions()
trans_cnt, states = self._calc_transitions(trajs)
trans_prob = self._normalize_trans(trans_cnt)
state_rwd = self._calc_rewards(trajs)
self.mdp = {
'states': sorted(states),
'num_states': len(states),
'transitions': trans_prob,
'state_values': {s: np.mean(r) for s, r in state_rwd.items()},
'state_rewards': state_rwd,
'trans_counts': trans_cnt,
}
return self.mdp
def transition_prob(self, s: str, s_next: str) -> float:
if not self.mdp: raise ValueError("build MDP first")
return self.mdp['transitions'].get(s, {}).get(s_next, 0.0)
def state_value(self, s: str) -> float:
if not self.mdp: raise ValueError("build MDP first")
return self.mdp['state_values'].get(s, 0.0)
def sample_traj(self, start: str, max_len: int = 50) -> List[str]:
if not self.mdp: raise ValueError("build MDP first")
path, curr = [start], start
for _ in range(max_len):
nxt = self.mdp['transitions'].get(curr, {})
if not nxt: break
curr = np.random.choice(list(nxt.keys()), p=list(nxt.values()))
path.append(curr)
return path
def extract_trajectory_features(self, events: List, max_trans_dim: int = 50) -> np.ndarray:
"""Convert trajectory to feature vector using MDP structure for contrastive learning"""
if not self.mdp:
self.build_MDP()
states = [self._state_repr(e) for e in sorted(events, key=self._sort_key)]
features = []
# transition histogram over MDP state space
trans_counts = defaultdict(int)
for s, s_next in zip(states, states[1:]):
trans_counts[(s, s_next)] += 1
all_trans = [(s, t) for s in self.mdp['states'] for t in self.mdp['transitions'].get(s, {}).keys()]
trans_vec = [trans_counts.get(tr, 0) for tr in all_trans[:max_trans_dim]]
trans_vec = trans_vec + [0] * (max_trans_dim - len(trans_vec)) # pad
total_trans = sum(trans_counts.values()) or 1
features.extend([v / total_trans for v in trans_vec])
# state coverage ratio
visited = set(states)
features.append(len(visited) / max(self.mdp['num_states'], 1))
# temporal entropy of transitions
if len(states) > 1:
trans_probs = [self.transition_prob(s, s_n) for s, s_n in zip(states, states[1:])]
entropy = -sum(p * np.log(p + 1e-10) for p in trans_probs if p > 0)
features.append(entropy / max(len(states), 1))
else:
features.append(0.0)
# trajectory length and unique state count
features.append(len(states))
features.append(len(visited))
# state value statistics along trajectory
vals = [self.state_value(s) for s in states]
if vals:
features.extend([np.mean(vals), np.std(vals), np.min(vals), np.max(vals)])
else:
features.extend([0.0, 0.0, 0.0, 0.0])
return np.array(features, dtype=np.float32)
class AgentBehaviorModel(BehaviorModel):
def __init__(self, src_dir: str):
super().__init__(src_dir, AgentLoader)
def _state_repr(self, evt) -> str:
return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
def _sort_key(self, evt):
return evt.ts
class JointBehaviorModel(BehaviorModel):
def __init__(self, human_dir: str, agent_dir: str):
self.loader = JointLoader(human_dir, agent_dir)
self.data = self.loader.get_data()
self.entries, self.num_entries = self.loader.get_entries()
self.mdp = None
def _state_repr(self, evt) -> str:
return f"{evt.page or 'unk'}|{evt.productId or 'none'}|{evt.eventName}"
def _sort_key(self, evt):
return evt.ts
def aggregate_event_transitions(mdp: Dict) -> Dict[str, Dict[str, float]]:
evt_trans = defaultdict(lambda: defaultdict(float))
for s, trans in mdp['transitions'].items():
src = s.split('|')[2]
for s_next, prob in trans.items():
dst = s_next.split('|')[2]
evt_trans[src][dst] += prob
for src in evt_trans:
total = sum(evt_trans[src].values())
if total > 0:
evt_trans[src] = {dst: p/total for dst, p in evt_trans[src].items()}
return dict(evt_trans)
def visualize_mdp(model: BehaviorModel, threshold: float = 0.05, output: str = "mdp_graph",
fmt: str = "svg", view: bool = False, export_dot: bool = False):
if not model.mdp: raise ValueError("build MDP first")
evt_trans = aggregate_event_transitions(model.mdp)
g = graphviz.Digraph(format=fmt)
g.attr(rankdir='LR', size='30')
g.attr('node', shape='circle', width='1', height='1')
events = set(evt_trans.keys()) | {e for trans in evt_trans.values() for e in trans.keys()}
for evt in events:
g.node(evt)
for src, dsts in evt_trans.items():
for dst, prob in dsts.items():
if prob > threshold:
g.edge(src, dst, label=f'{prob:.2f}')
g.render(output, view=view, cleanup=True)
print(f"Saved MDP graph to {output}.{fmt}")
if export_dot:
with open(f"{output}.dot", 'w') as f:
f.write(g.source)
print(f"Exported DOT source to {output}.dot")
return g
def kl_divergence(p: Dict[str, float], q: Dict[str, float]) -> float:
eps = 1e-10
# p + log(p / q) summed over all keys in P
return sum((p[k] + eps) * np.log((p[k] + eps) / (q.get(k, 0.0) + eps)) for k in p)
if __name__ == "__main__":
base_dir = "/home/velocitatem/Documents/Projects/PHANTOM/experiments"
human_dir, agent_dir = f"{base_dir}/collected_data/", f"{base_dir}/agents/collected_data/"
human_model = BehaviorModel(human_dir)
human_mdp = human_model.build_MDP()
print(f"Built MDP: {human_mdp['num_states']} states, "
f"{sum(len(t) for t in human_mdp['transitions'].values())} transitions")
if not human_mdp['states']:
exit("No states found")
visualize_mdp(human_model, threshold=0.05, output="human_mdp_viz", fmt="pdf", export_dot=True)
agent_model = AgentBehaviorModel(agent_dir)
agent_mdp = agent_model.build_MDP()
print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
if not agent_mdp['states']:
exit("No states found")
visualize_mdp(agent_model, threshold=0.05, output="agent_mdp_viz", fmt="pdf", export_dot=True)
human_evt = aggregate_event_transitions(human_mdp)
agent_evt = aggregate_event_transitions(agent_mdp)
common = set(human_evt.keys()) & set(agent_evt.keys())
if not common:
exit("No common event types for KL divergence analysis")
kl_divs = sorted([(e, kl_divergence(human_evt[e], agent_evt[e])) for e in common],
key=lambda x: x[1], reverse=True)
print(f"Average KL divergence: {np.mean([kl for _, kl in kl_divs]):.4f}")
print("\nMost divergent event types:")
for evt, kl in kl_divs:
print(f" {evt}: {kl:.4f}")
print("\n=== Joint Model (Human + Agent Combined) ===")
joint_model = JointBehaviorModel(human_dir, agent_dir)
joint_mdp = joint_model.build_MDP()
print(f"Built joint MDP: {joint_mdp['num_states']} states, "
f"{sum(len(t) for t in joint_mdp['transitions'].values())} transitions")
if joint_mdp['states']:
visualize_mdp(joint_model, threshold=0.05, output="joint_mdp_viz", fmt="pdf", export_dot=True)

240
sim/rl/engine.py Normal file
View File

@@ -0,0 +1,240 @@
from os import kill
import numpy as np
import pandas as pd
from abc import ABC, abstractmethod
from typing import Dict, Any
from sim.rl.environment import BusinessLogicConstraints
"""
An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
"""
class BasePricingEngine(ABC):
"""base interface for all pricing engines"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
self.c = constraints
self.rng = np.random.default_rng(seed)
self.step_count = 0
@abstractmethod
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
"""compute new prices given current state and observation from environment
args:
current_prices: current price vector [N]
observation: dict containing 'price', 'demand', and possibly interaction data
returns:
new_prices: updated price vector [N]
"""
pass
def update(self, observation: Dict[str, Any], reward: float, done: bool, info: Dict[str, Any]) -> None:
"""Default no-op update. Engines can override as needed."""
self.last_observation = observation
self.last_reward = reward
self.last_info = info
def reset(self):
"""reset engine state for new episode"""
self.step_count = 0
class WildPricingEngine(BasePricingEngine):
"""production-like pricing using online elasticity estimation via EWMA regression"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
super().__init__(constraints, seed)
# per-product unit costs (unknown to customers; known to platform)
self.unit_cost = self.rng.uniform(8.0, 40.0, size=self.c.product_catalogue_size).astype(np.float32)
# online elasticity estimate (start moderately elastic)
self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32)
# EWMA state for log-log regression
self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
self.cov_pq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
self.var_p = np.ones(self.c.product_catalogue_size, dtype=np.float32)
# knobs typical in production
self.lr = 0.08
self.ewma = 0.05
self.eps_explore = 0.03
self.explore_scale = 0.03
def _safe_elasticity(self, e: np.ndarray) -> np.ndarray:
return np.clip(e, -5.0, -1.05)
def reset(self):
super().reset()
self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
self.step_count += 1
# extract demand signal (from env observation) as proxy for sales
demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
return self._update_from_demand(current_prices, demand)
def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
# log transforms (add 1 to handle zeros)
logp = np.log(np.clip(prices, 1e-3, None)).astype(np.float32)
logq = np.log(sold + 1.0).astype(np.float32)
# EWMA moments for per-product regression: logq ≈ a + e*logp
a = self.ewma
dp = logp - self.mu_logp
dq = logq - self.mu_logq
self.mu_logp = (1 - a) * self.mu_logp + a * logp
self.mu_logq = (1 - a) * self.mu_logq + a * logq
self.cov_pq = (1 - a) * self.cov_pq + a * (dp * dq)
self.var_p = (1 - a) * self.var_p + a * (dp * dp + 1e-6)
e_new = self.cov_pq / (self.var_p + 1e-6)
self.e_hat = self._safe_elasticity(0.9 * self.e_hat + 0.1 * e_new)
# profit-optimal price for isoelastic demand (if e < -1)
e = self.e_hat
p_star = self.unit_cost * (e / (e + 1.0))
# smooth toward p_star
new_prices = (1 - self.lr) * prices + self.lr * p_star
# exploration (small random perturbations)
if self.rng.random() < self.eps_explore:
noise = self.rng.normal(0.0, self.explore_scale, size=new_prices.shape).astype(np.float32)
new_prices = new_prices * (1.0 + noise)
# apply business guardrails (max change + bounds)
max_adj = self.c.max_price_adjustment
ratio = np.clip(new_prices / (prices + 1e-6), 1 - max_adj, 1 + max_adj)
new_prices = prices * ratio
new_prices = np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
return new_prices
class StaticPricingEngine(BasePricingEngine):
"""baseline: fixed prices throughout episode"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
super().__init__(constraints, seed)
self.fixed_prices = None
def reset(self):
super().reset()
self.fixed_prices = None
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
self.step_count += 1
if self.fixed_prices is None:
self.fixed_prices = current_prices.copy()
return self.fixed_prices.copy()
class SimpleDemandEngine(BasePricingEngine):
"""demand-driven pricing: increase price when demand rises, decrease when it falls"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
super().__init__(constraints, seed)
self.prev_demand = None
self.lr = 0.05
def reset(self):
super().reset()
self.prev_demand = None
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
self.step_count += 1
demand = _extract_demand(observation, self.c.product_catalogue_size)
if self.prev_demand is None:
self.prev_demand = demand.copy()
return current_prices.copy()
# simple rule: if demand increases, raise price; if decreases, lower price
delta_d = demand - self.prev_demand
price_adj = self.lr * np.sign(delta_d) * np.abs(delta_d) / (np.abs(self.prev_demand) + 1.0)
new_prices = current_prices * (1.0 + price_adj)
self.prev_demand = demand.copy()
# apply constraints
max_adj = self.c.max_price_adjustment
ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
new_prices = current_prices * ratio
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
class RandomWalkEngine(BasePricingEngine):
"""random walk pricing with mean reversion"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
super().__init__(constraints, seed)
self.target_price = None
self.volatility = 0.02
def reset(self):
super().reset()
self.target_price = None
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
self.step_count += 1
if self.target_price is None:
self.target_price = current_prices.copy()
# random walk with mean reversion toward target
noise = self.rng.normal(0.0, self.volatility, size=current_prices.shape).astype(np.float32)
reversion = 0.01 * (self.target_price - current_prices)
new_prices = current_prices * (1.0 + noise) + reversion
# apply constraints
max_adj = self.c.max_price_adjustment
ratio = np.clip(new_prices / (current_prices + 1e-6), 1 - max_adj, 1 + max_adj)
new_prices = current_prices * ratio
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
class ThompsonSamplingEngine(BasePricingEngine):
"""bayesian bandit approach per product treating price as discrete action"""
def __init__(self, constraints: BusinessLogicConstraints, seed: int = 0):
super().__init__(constraints, seed)
self.n_price_levels = 5
self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
self.price_grid = None
self.last_actions = None
def reset(self):
super().reset()
self.alpha = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
self.beta = np.ones((self.c.product_catalogue_size, self.n_price_levels), dtype=np.float32)
self.price_grid = None
self.last_actions = None
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
self.step_count += 1
if self.price_grid is None:
# define price grid per product
lo = current_prices * 0.7
hi = current_prices * 1.3
self.price_grid = np.linspace(lo, hi, self.n_price_levels).T
demand = _extract_demand(observation, self.c.product_catalogue_size)
# update beliefs based on last action
if self.last_actions is not None:
for i in range(self.c.product_catalogue_size):
a = self.last_actions[i]
reward = demand[i]
if reward > 0.5:
self.alpha[i, a] += reward
else:
self.beta[i, a] += 1.0
# thompson sampling: sample from posterior, pick best
new_prices = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
actions = np.zeros(self.c.product_catalogue_size, dtype=int)
for i in range(self.c.product_catalogue_size):
theta = self.rng.beta(self.alpha[i], self.beta[i]).astype(np.float32)
actions[i] = int(np.argmax(theta))
new_prices[i] = self.price_grid[i, actions[i]]
self.last_actions = actions
return np.clip(new_prices, self.c.system_min_price, self.c.system_max_price).astype(np.float32)
def _extract_demand(observation: Dict[str, Any], n: int) -> np.ndarray:
if "elasticity" in observation and isinstance(observation["elasticity"], dict):
d = observation["elasticity"].get("demand")
if d is not None:
return np.asarray(d, dtype=np.float32)
d = observation.get("demand")
if d is not None:
return np.asarray(d, dtype=np.float32)
return np.zeros(n, dtype=np.float32)

View File

@@ -1,451 +1,244 @@
import gymnasium as gym from __future__ import annotations
from gymnasium import spaces
import numpy as np
from dataclasses import dataclass from dataclasses import dataclass
import pandas as pd from typing import Any, Dict, Optional, Tuple
from typing import Callable, Optional, Dict, Any, List
# "learner" agent learning to optimize pricing import numpy as np
# "agent" part of environment creating demand signals that learner processes
try:
import gymnasium as gym
from gymnasium import spaces
except ImportError as e:
raise ImportError("sim.rl.environment requires gymnasium") from e
from sim.case.thesis_simplified.coi import COIWindow, coi_erosion, compute_coi_window
from sim.case.thesis_simplified.separability import estimate_alpha as estimate_session_alpha
from sim.case.thesis_simplified.simplified import Limbo, Session, put_prices_to_market
from sim.rl.thesis_core import aggregate_demand_by_product, aggregate_purchases, constrain_prices
@dataclass(frozen=True)
class BusinessLogicConstraints:
product_catalogue_size: int = 100
max_steps: int = 2000
sessions_per_step: int = 250
@dataclass
class BusinessLogicConstraints():
max_price_adjustment: float = 0.30
system_max_price: float = 500.0 system_max_price: float = 500.0
system_min_price: float = 1.0 system_min_price: float = 1.0
product_catelogue_size: int = 100 max_price_adjustment: float = 0.30
episode_length: int = 200 min_margin_pct: float = 0.05
sessions_per_step: int = 250
agent_share: float = 0.25 agent_share: float = 0.2
agent_recon_multiplier: float = 6.0 alpha_drift: float = 0.0
agent_purchase_probability: float = 0.20 alpha_bounds: tuple[float, float] = (0.0, 0.8)
coi_strength: float = 0.25 coi_strength: float = 0.25
coi_threshold: float = 4.0
coi_sigmoid_temp: float = 1.25
base_human_demand: float = 0.08
base_agent_demand: float = 0.05
human_price_elasticity: float = -1.2
agent_price_elasticity: float = -0.6
w_agent_loss: float = 1.0
w_volatility: float = 5.0 w_volatility: float = 5.0
w_estimation_error: float = 0.25 w_estimation_error: float = 0.25
seed: int = 7 seed: int = 7
def _sigmoid(x: np.ndarray) -> np.ndarray: def make_env(constraints: Optional[BusinessLogicConstraints] = None) -> "PHANTOMEnv":
return 1.0 / (1.0 + np.exp(-x)) return PHANTOMEnv(constraints=constraints or BusinessLogicConstraints())
def simple_agent_detector(session_df: pd.DataFrame) -> pd.Series:
# baseline heuristic: high velocity + low conversion
v = session_df.get("interaction_velocity", pd.Series(0.0, index=session_df.index))
cr = session_df.get("conversion_rate", pd.Series(0.0, index=session_df.index))
total = session_df.get("total_interactions", pd.Series(0, index=session_df.index))
return (total >= 12) & (v >= 0.20) & (cr <= 0.01)
class CommercePlatform:
def __init__(self, product_catelogue_size: int, max_price: float, min_price: float,
constraints: BusinessLogicConstraints, agent_detector: Optional[Callable[[pd.DataFrame], pd.Series]] = None,
use_defense: bool = False):
self.product_catelogue_size = product_catelogue_size
self.max_price = max_price
self.min_price = min_price
self.constraints = constraints
self.use_defense = use_defense
self.agent_detector = agent_detector
self.simulation_history: List[Dict[str, Any]] = []
self._rng = np.random.default_rng(constraints.seed)
self._popularity = self._rng.lognormal(mean=0.0, sigma=0.6, size=self.product_catelogue_size)
self._popularity = self._popularity / (self._popularity.mean() + 1e-12)
self._last_interaction_df: pd.DataFrame = pd.DataFrame()
def setup_true_demand(self, prices: np.ndarray) -> Dict[str, np.ndarray]:
# ground truth purchase propensities
p = np.clip(prices, self.min_price, self.max_price)
pn = p / self.max_price
human_prob = self.constraints.base_human_demand * (pn ** self.constraints.human_price_elasticity)
agent_prob = self.constraints.base_agent_demand * (pn ** self.constraints.agent_price_elasticity)
return {
"human_purchase_prob": np.clip(human_prob * self._popularity, 0.0, 0.95),
"agent_purchase_prob": np.clip(agent_prob * self._popularity, 0.0, 0.95)
}
def _session_markup_multiplier(self, signal_score: float) -> float:
# session-based COI markup based on demand signal expression
x = (signal_score - self.constraints.coi_threshold) / max(self.constraints.coi_sigmoid_temp, 1e-6)
return 1.0 + self.constraints.coi_strength * float(_sigmoid(np.array([x]))[0])
def _simulate_sessions(self, base_prices: np.ndarray) -> pd.DataFrame:
demand = self.setup_true_demand(base_prices)
human_pprob = demand["human_purchase_prob"]
agent_pprob = demand["agent_purchase_prob"]
events: List[Dict[str, Any]] = []
T = self.constraints.sessions_per_step
n_agent_sessions = int(round(T * self.constraints.agent_share))
n_human_sessions = T - n_agent_sessions
# human sessions: normal browse with possible purchase
for s in range(n_human_sessions):
session_id = f"h_{len(events)}_{s}"
k = int(self._rng.integers(1, 4))
prod_ids = self._rng.choice(self.product_catelogue_size, size=k, replace=False)
t = 0.0
inter_times = self._rng.gamma(shape=2.0, scale=3.0, size=3 * k)
signal_score = 0.0
purchased_any = False
for i, pid in enumerate(prod_ids):
t += float(inter_times[i])
price_shown = float(base_prices[pid])
events.append({
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
"action": "view", "t": t, "price_shown": price_shown, "is_purchase": 0,
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
})
signal_score += 1.0
if self._rng.random() < 0.35:
t += float(inter_times[i + k])
events.append({
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
"action": "cart", "t": t, "price_shown": price_shown, "is_purchase": 0,
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
})
signal_score += 2.0
if (not purchased_any) and (self._rng.random() < float(human_pprob[pid])):
t += float(inter_times[i + 2 * k])
mult = self._session_markup_multiplier(signal_score)
price_paid = float(np.clip(base_prices[pid] * mult, self.min_price, self.max_price))
events.append({
"session_id": session_id, "actor": "human", "agent_id": None, "product_id": int(pid),
"action": "purchase", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 1,
"price_paid": price_paid, "oracle_price_paid": price_paid, "signal_score": signal_score,
})
purchased_any = True
# agent sessions: split recon/purchase to circumvent COI
n_agent_ids = max(1, n_agent_sessions // 2)
for a in range(n_agent_ids):
agent_id = f"a_{a}"
recon_session_id = f"{agent_id}_recon"
t = 0.0
n_views = int(self._rng.poisson(lam=8) * self.constraints.agent_recon_multiplier) + 5
inter_times = self._rng.gamma(shape=2.0, scale=0.6, size=max(n_views, 1))
prod_ids = self._rng.integers(0, self.product_catelogue_size, size=n_views)
recon_signal = 0.0
for i, pid in enumerate(prod_ids):
t += float(inter_times[i])
events.append({
"session_id": recon_session_id, "actor": "agent", "agent_id": agent_id, "product_id": int(pid),
"action": "view", "t": t, "price_shown": float(base_prices[pid]), "is_purchase": 0,
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
})
recon_signal += 1.0
# clean purchase session with minimal interactions
if self._rng.random() < self.constraints.agent_purchase_probability:
purchase_session_id = f"{agent_id}_clean"
pid = int(self._rng.integers(0, self.product_catelogue_size))
t2 = 0.0
clean_signal = 0.0
t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
events.append({
"session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
"action": "view", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 0,
"price_paid": 0.0, "oracle_price_paid": 0.0, "signal_score": 0.0,
})
clean_signal += 1.0
if self._rng.random() < float(agent_pprob[pid]):
t2 += float(self._rng.gamma(shape=2.0, scale=0.7))
obs_mult = self._session_markup_multiplier(clean_signal)
obs_paid = float(np.clip(base_prices[pid] * obs_mult, self.min_price, self.max_price))
oracle_mult = self._session_markup_multiplier(recon_signal) # oracle links recon->purchase
oracle_paid = float(np.clip(base_prices[pid] * oracle_mult, self.min_price, self.max_price))
events.append({
"session_id": purchase_session_id, "actor": "agent", "agent_id": agent_id, "product_id": pid,
"action": "purchase", "t": t2, "price_shown": float(base_prices[pid]), "is_purchase": 1,
"price_paid": obs_paid, "oracle_price_paid": oracle_paid, "signal_score": clean_signal,
})
return pd.DataFrame(events)
def compute_interaction_features(self, interaction_df: pd.DataFrame) -> Dict[str, float]:
if interaction_df.empty:
return {"mean_sale_price": 0.0, "look_to_book": 0.0}
purchases = interaction_df[interaction_df["action"] == "purchase"]
mean_sale_price = float(purchases["price_paid"].mean()) if not purchases.empty else 0.0
views = float((interaction_df["action"] == "view").sum())
buys = float((interaction_df["action"] == "purchase").sum())
return {"mean_sale_price": mean_sale_price, "look_to_book": float(views / (buys + 1e-6))}
def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return pd.DataFrame()
g = df.groupby("session_id", sort=False)
session_duration = g["t"].max() - g["t"].min()
total_interactions = g.size()
avg_time_between = g["t"].apply(lambda x: float(np.diff(np.sort(x.to_numpy())).mean()) if len(x) > 1 else 0.0)
interaction_velocity = total_interactions / (session_duration + 1e-6)
views = g.apply(lambda x: int((x["action"] == "view").sum()), include_groups=False)
cart_adds = g.apply(lambda x: int((x["action"] == "cart").sum()), include_groups=False)
purchases = g.apply(lambda x: int((x["action"] == "purchase").sum()), include_groups=False)
conversion_rate = purchases / (views + 1e-6)
is_agent = g["actor"].apply(lambda s: bool((s == "agent").any()), include_groups=False)
return pd.DataFrame({
"session_duration_sec": session_duration.astype(float),
"avg_time_between_events": avg_time_between.astype(float),
"total_interactions": total_interactions.astype(int),
"interaction_velocity": interaction_velocity.astype(float),
"item_views": views.astype(int),
"cart_adds": cart_adds.astype(int),
"purchases": purchases.astype(int),
"conversion_rate": conversion_rate.astype(float),
"is_agent": is_agent.astype(bool),
}).reset_index()
def demand_estimate(self, interaction_df: pd.DataFrame, exclude_sessions: Optional[pd.Series] = None) -> np.ndarray:
# proxy demand from weighted interaction events
if interaction_df.empty:
return np.zeros(self.product_catelogue_size, dtype=np.float32)
df = interaction_df
if exclude_sessions is not None:
bad_sessions = set(exclude_sessions.loc[exclude_sessions].index)
df = df[~df["session_id"].isin(bad_sessions)]
weights = {"view": 0.15, "cart": 0.75, "purchase": 2.5}
w = df["action"].map(weights).fillna(0.0).to_numpy(dtype=float)
prod = df["product_id"].to_numpy(dtype=int)
q_hat = np.zeros(self.product_catelogue_size, dtype=float)
np.add.at(q_hat, prod, w)
return q_hat.astype(np.float32)
def run_pricing_simulation(self, prices: np.ndarray) -> Dict[str, Any]:
interaction_df = self._simulate_sessions(prices)
self._last_interaction_df = interaction_df
session_df = self._session_feature_table(interaction_df)
predicted_agent_sessions = None
if (self.use_defense and self.agent_detector is not None and not session_df.empty):
predicted_agent_sessions = self.agent_detector(session_df.set_index("session_id"))
q_hat_naive = self.demand_estimate(interaction_df, exclude_sessions=None)
q_hat_defended = self.demand_estimate(interaction_df, exclude_sessions=predicted_agent_sessions) \
if predicted_agent_sessions is not None else q_hat_naive.copy()
true_human = np.zeros(self.product_catelogue_size, dtype=float)
true_agent = np.zeros(self.product_catelogue_size, dtype=float)
if not interaction_df.empty:
purchases = interaction_df[interaction_df["action"] == "purchase"]
if not purchases.empty:
for _, r in purchases.iterrows():
if r["actor"] == "human":
true_human[int(r["product_id"])] += 1.0
else:
true_agent[int(r["product_id"])] += 1.0
revenue_observed = float(interaction_df["price_paid"].sum()) if not interaction_df.empty else 0.0
revenue_oracle = float(interaction_df["oracle_price_paid"].sum()) if not interaction_df.empty else 0.0
agent_loss = max(0.0, revenue_oracle - revenue_observed)
eps = 1e-6
internal_error_naive = np.abs(true_human - q_hat_naive) / (true_human + eps)
internal_error_def = np.abs(true_human - q_hat_defended) / (true_human + eps)
interaction_features = self.compute_interaction_features(interaction_df)
summary = {
"prices": prices.copy(),
"interaction_df": interaction_df,
"session_df": session_df,
"q_hat_naive": q_hat_naive,
"q_hat_defended": q_hat_defended,
"true_human_demand": true_human.astype(np.float32),
"true_agent_purchases": true_agent.astype(np.float32),
"internal_error_naive": internal_error_naive.astype(np.float32),
"internal_error_defended": internal_error_def.astype(np.float32),
"interaction_features": interaction_features,
"revenue_observed": revenue_observed,
"revenue_oracle": revenue_oracle,
"agent_loss": agent_loss,
"predicted_agent_sessions": predicted_agent_sessions,
}
self.simulation_history.append(summary)
return summary
def get_interaction_data(self) -> np.ndarray:
if self._last_interaction_df.empty:
return np.array([], dtype=object)
return self._last_interaction_df.to_dict(orient="records")
class PHANTOMEnv(gym.Env): class PHANTOMEnv(gym.Env):
metadata = {"render_modes": []} metadata = {"render_modes": ["human", "ansi"]}
def __init__(self, use_defense: bool = False): def __init__(self, constraints: Optional[BusinessLogicConstraints] = None):
super().__init__() super().__init__()
self.constraints = BusinessLogicConstraints() self.c = constraints or BusinessLogicConstraints()
self.action_space = spaces.Box(low=-self.constraints.max_price_adjustment, self.n = int(self.c.product_catalogue_size)
high=self.constraints.max_price_adjustment,
shape=(self.constraints.product_catelogue_size,), dtype=np.float32) self._rng = np.random.default_rng(self.c.seed)
self.observation_space = spaces.Dict({ self._t = 0
"elasticity": spaces.Dict({ self._alpha_true = float(self.c.agent_share)
"price": spaces.Box( self._alpha_hat = float(self.c.agent_share)
low=np.full((self.constraints.product_catelogue_size,), self.constraints.system_min_price, dtype=np.float32), self._costs = np.zeros(self.n, dtype=np.float32)
high=np.full((self.constraints.product_catelogue_size,), self.constraints.system_max_price, dtype=np.float32), self._refs = np.zeros(self.n, dtype=np.float32)
dtype=np.float32), self._prices: Optional[np.ndarray] = None
"demand": spaces.Box( self._last_sessions: list[Session] = []
low=np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32), self._last_coi: COIWindow | None = None
high=np.full((self.constraints.product_catelogue_size,), 1e6, dtype=np.float32), self._limbo = Limbo()
dtype=np.float32),
}) self.action_space = spaces.Box(
}) low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
self.commerce_platform = CommercePlatform( high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
product_catelogue_size=self.constraints.product_catelogue_size, dtype=np.float32,
max_price=self.constraints.system_max_price, )
min_price=self.constraints.system_min_price, self.observation_space = spaces.Dict(
constraints=self.constraints, {
agent_detector=simple_agent_detector, "elasticity": spaces.Dict(
use_defense=use_defense) {
self._rng = np.random.default_rng(self.constraints.seed) "price": spaces.Box(
self.t = 0 low=np.full((self.n,), self.c.system_min_price, dtype=np.float32),
self._prev_prices: Optional[np.ndarray] = None high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
self.state: Dict[str, Any] = {} dtype=np.float32,
),
"demand": spaces.Box(
low=np.zeros((self.n,), dtype=np.float32),
high=np.full((self.n,), 1e9, dtype=np.float32),
dtype=np.float32,
),
}
),
"market": spaces.Dict(
{
"alpha_hat": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
"revenue_rate": spaces.Box(low=0.0, high=1e12, shape=(1,), dtype=np.float32),
"conversion_rate": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
"price_volatility": spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32),
}
),
"cost": spaces.Box(
low=np.zeros((self.n,), dtype=np.float32),
high=np.full((self.n,), self.c.system_max_price, dtype=np.float32),
dtype=np.float32,
),
}
)
def _reset_catalogue(self) -> None:
self._costs = self._rng.uniform(15.0, 60.0, size=self.n).astype(np.float32)
margins = self._rng.uniform(0.2, 0.6, size=self.n).astype(np.float32)
self._refs = (self._costs * (1.0 + margins)).astype(np.float32)
self._prices = self._refs.copy()
def _observe_market(
self, prices: np.ndarray
) -> tuple[list[Session], Dict[str, float], np.ndarray, np.ndarray, float, float, int]:
sessions, demand_map = put_prices_to_market(
prices,
costs=self._costs,
alpha=self._alpha_true,
n_sessions=int(self.c.sessions_per_step),
seed=int(self._rng.integers(0, 2**31 - 1)),
)
demand_by_product = aggregate_demand_by_product(sessions, demand_map, self.n)
purchases, revenue, cost, n_agents = aggregate_purchases(sessions, self._costs, self.n)
conversion = float(np.sum(purchases) / max(len(sessions), 1))
return sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents
def _update_alpha_hat(self, sessions: list[Session]) -> float:
scores = [estimate_session_alpha(s) for s in sessions if s.events]
if not scores:
return self._alpha_hat
alpha_step = float(np.mean(scores))
self._alpha_hat = 0.8 * self._alpha_hat + 0.2 * alpha_step
self._alpha_hat = float(np.clip(self._alpha_hat, 0.0, 1.0))
return self._alpha_hat
def _reward(self, prices: np.ndarray, revenue: float, cost: float, volatility: float) -> float:
profit = float(revenue - cost)
coi_leak = float(self._last_coi.leak) if self._last_coi else 0.0
alpha_err = abs(self._alpha_hat - self._alpha_true)
return profit - self.c.coi_strength * coi_leak - self.c.w_volatility * volatility - self.c.w_estimation_error * alpha_err
def _build_obs(
self,
prices: np.ndarray,
demand_by_product: np.ndarray,
revenue: float,
conversion: float,
volatility: float,
) -> Dict[str, Any]:
return {
"elasticity": {"price": prices.astype(np.float32), "demand": demand_by_product.astype(np.float32)},
"market": {
"alpha_hat": np.array([self._alpha_hat], dtype=np.float32),
"revenue_rate": np.array([revenue], dtype=np.float32),
"conversion_rate": np.array([conversion], dtype=np.float32),
"price_volatility": np.array([volatility], dtype=np.float32),
},
"cost": self._costs.astype(np.float32),
}
def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed) super().reset(seed=seed)
if seed is not None: if seed is not None:
self._rng = np.random.default_rng(seed) self._rng = np.random.default_rng(seed)
self.commerce_platform._rng = np.random.default_rng(seed) self._t = 0
self.t = 0 self._alpha_true = float(np.clip(self.c.agent_share, *self.c.alpha_bounds))
init_prices = self._rng.uniform(low=60.0, high=140.0, size=(self.constraints.product_catelogue_size,)).astype(np.float32) self._alpha_hat = float(self.c.agent_share)
self._prev_prices = init_prices.copy() self._reset_catalogue()
self.state = { self._limbo = Limbo()
"elasticity": { self._last_sessions = []
"price": init_prices, self._last_coi = None
"demand": np.zeros((self.constraints.product_catelogue_size,), dtype=np.float32),
}
}
return self.state, {}
def step(self, action: np.ndarray): prices = self._prices if self._prices is not None else np.zeros(self.n, dtype=np.float32)
self.t += 1 obs = self._build_obs(prices, np.zeros(self.n, dtype=np.float32), 0.0, 0.0, 0.0)
base_prices = self.state["elasticity"]["price"].astype(np.float32) return obs, {"alpha_true": self._alpha_true}
new_prices = np.clip(base_prices * (1.0 + action.astype(np.float32)),
self.constraints.system_min_price,
self.constraints.system_max_price).astype(np.float32)
result = self.commerce_platform.run_pricing_simulation(new_prices)
if self.commerce_platform.use_defense: def step(self, action: np.ndarray) -> Tuple[Dict[str, Any], float, bool, bool, Dict[str, Any]]:
demand_est = result["q_hat_defended"] if self._prices is None:
internal_err = result["internal_error_defended"] raise RuntimeError("reset() must be called before step()")
else:
demand_est = result["q_hat_naive"]
internal_err = result["internal_error_naive"]
self.state["elasticity"]["price"] = new_prices prev = self._prices
self.state["elasticity"]["demand"] = demand_est prices = constrain_prices(
prev,
np.asarray(action, dtype=np.float32),
costs=self._costs,
min_price=float(self.c.system_min_price),
max_price=float(self.c.system_max_price),
max_adjustment=float(self.c.max_price_adjustment),
min_margin_pct=float(self.c.min_margin_pct),
)
self._prices = prices
self._limbo.add_update("prices", prices)
volatility = 0.0 if self._prev_prices is None else \ sessions, demand_map, demand_by_product, purchases, revenue, cost, n_agents = self._observe_market(prices)
float(np.mean(np.abs((new_prices - self._prev_prices) / (self._prev_prices + 1e-6)))) self._last_sessions = sessions
self._prev_prices = new_prices.copy() self._limbo.add_update("demand", demand_map)
revenue_observed = float(result["revenue_observed"]) self._update_alpha_hat(self._last_sessions)
agent_loss = float(result["agent_loss"]) self._last_coi = compute_coi_window(self._last_sessions, self._costs, demand_mapping=demand_map)
err_mean = float(np.mean(internal_err))
reward = (revenue_observed self._alpha_true = float(np.clip(self._alpha_true + self.c.alpha_drift, *self.c.alpha_bounds))
- self.constraints.w_agent_loss * agent_loss volatility = float(np.std((prices - prev) / (prev + 1e-6)))
- self.constraints.w_volatility * volatility reward = float(self._reward(prices, revenue, cost, volatility))
- self.constraints.w_estimation_error * err_mean) conversion = float(np.sum(purchases) / max(len(self._last_sessions), 1))
terminated = self.t >= self.constraints.episode_length self._t += 1
terminated = self._t >= int(self.c.max_steps)
obs = self._build_obs(prices, demand_by_product, revenue, conversion, min(volatility, 1.0))
info = { info = {
"t": self.t, "step": self._t,
"revenue_observed": revenue_observed, "reward": reward,
"revenue_oracle": float(result["revenue_oracle"]), "revenue": float(revenue),
"agent_loss": agent_loss, "profit": float(revenue - cost),
"ux_volatility": volatility, "n_sessions": int(self.c.sessions_per_step),
"mean_internal_error": err_mean, "n_agents": int(n_agents),
"look_to_book": float(result["interaction_features"].get("look_to_book", 0.0)), "alpha_true": float(self._alpha_true),
"mean_sale_price": float(result["interaction_features"].get("mean_sale_price", 0.0)), "alpha_hat": float(self._alpha_hat),
"true_human_purchases_total": float(np.sum(result["true_human_demand"])), "alpha_error": float(abs(self._alpha_hat - self._alpha_true)),
"true_agent_purchases_total": float(np.sum(result["true_agent_purchases"])), "price_std": float(np.std(prices)),
"price_volatility": float(volatility),
} }
return self.state, float(reward), terminated, False, info if self._last_coi is not None:
info.update(
{
"coi_policy": float(self._last_coi.policy),
"coi_agent": float(self._last_coi.agent),
"coi_leakage": float(self._last_coi.leak),
"coi_survival": float(self._last_coi.survival_ratio),
"coi_erosion": float(coi_erosion(self._last_coi.policy, self._last_coi.agent)),
}
)
return obs, reward, terminated, False, info
def render(self, mode: str = "human") -> str | None:
if self._prices is None:
return None
out = (
f"t={self._t}/{self.c.max_steps} "
f"alpha_true={self._alpha_true:.3f} alpha_hat={self._alpha_hat:.3f} "
f"price_std={float(np.std(self._prices)):.2f}"
)
if mode == "human":
print(out)
return out
if __name__ == "__main__": def close(self) -> None:
import matplotlib.pyplot as plt return
from collections import defaultdict
runs = {}
for use_defense in (False, True):
env = PHANTOMEnv(use_defense=use_defense)
obs, _ = env.reset(seed=42)
metrics = defaultdict(list)
total_reward = 0.0
done = False
while not done:
action = env.action_space.sample()
obs, reward, done, _, info = env.step(action)
total_reward += reward
p_mean = float(np.mean(obs["elasticity"]["price"]))
q_mean = float(np.mean(obs["elasticity"]["demand"]))
p_std = float(np.std(obs["elasticity"]["price"]))
metrics['t'].append(info['t'])
metrics['price_mean'].append(p_mean)
metrics['price_std'].append(p_std)
metrics['demand_mean'].append(q_mean)
metrics['revenue_observed'].append(info['revenue_observed'])
metrics['revenue_oracle'].append(info['revenue_oracle'])
metrics['agent_loss'].append(info['agent_loss'])
metrics['ux_volatility'].append(info['ux_volatility'])
metrics['look_to_book'].append(info['look_to_book'])
metrics['reward'].append(reward)
metrics['human_purchases'].append(info['true_human_purchases_total'])
metrics['agent_purchases'].append(info['true_agent_purchases_total'])
if info['t'] % 20 == 0 or done:
print(f"defense={'ON ' if use_defense else 'OFF'} t={info['t']:03d} p={p_mean:6.2f}±{p_std:4.2f} "
f"q={q_mean:6.2f} rev={info['revenue_observed']:7.2f} oracle={info['revenue_oracle']:7.2f} "
f"loss={info['agent_loss']:6.2f} ux={info['ux_volatility']:.3f} "
f"ltb={info['look_to_book']:5.2f} r={reward:7.2f}")
runs[use_defense] = metrics
print(f"defense={'ON ' if use_defense else 'OFF'} total_reward={total_reward:.2f}\n")
fig, axes = plt.subplots(3, 3, figsize=(15, 12))
fig.suptitle('PHANTOM Environment: Defense OFF vs ON', fontsize=14, fontweight='bold')
plot_configs = [
('price_mean', 'Mean Price', 'Price'),
('demand_mean', 'Mean Demand Estimate', 'Demand'),
('revenue_observed', 'Revenue (Observed)', 'Revenue'),
('agent_loss', 'Agent Loss (Oracle - Observed)', 'Loss'),
('ux_volatility', 'UX Volatility (Price Change)', 'Volatility'),
('look_to_book', 'Look-to-Book Ratio', 'Ratio'),
('reward', 'Step Reward', 'Reward'),
('human_purchases', 'Human Purchases', 'Count'),
('agent_purchases', 'Agent Purchases', 'Count'),
]
for idx, (key, title, ylabel) in enumerate(plot_configs):
ax = axes[idx // 3, idx % 3]
for use_defense, label, color in [(False, 'No Defense', 'red'), (True, 'With Defense', 'blue')]:
m = runs[use_defense]
ax.plot(m['t'], m[key], label=label, color=color, alpha=0.7, linewidth=1.5)
ax.set_xlabel('Step')
ax.set_ylabel(ylabel)
ax.set_title(title, fontsize=10, fontweight='bold')
ax.legend(loc='best', fontsize=8)
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('phantom_env_comparison.png', dpi=150, bbox_inches='tight')
print("Plot saved to phantom_env_comparison.png")
plt.show()

View File

@@ -0,0 +1,11 @@
"""JAX-accelerated simulation core for PHANTOM environment."""
from .transitions import TransitionData, compile_transitions, fallback_transitions, JAX_AVAILABLE
from .simulation import SessionBatch, SimResult, sample_sessions, compute_metrics
from .features import session_features, compute_session_transitions
from .separability import compute_divergences, estimate_alpha_batch
__all__ = [
"JAX_AVAILABLE", "TransitionData", "compile_transitions", "fallback_transitions",
"SessionBatch", "SimResult", "sample_sessions", "compute_metrics",
"session_features", "compute_session_transitions", "compute_divergences", "estimate_alpha_batch",
]

View File

@@ -0,0 +1,69 @@
"""Vectorized session feature extraction."""
import numpy as np
from .transitions import N_STATES, PURCHASE_IDX, CART_IDX
from .simulation import SessionBatch
try:
import jax.numpy as jnp
from jax import jit
JAX_AVAILABLE = True
except ImportError:
jnp, JAX_AVAILABLE = np, False
def jit(f): return f
@jit
def extract_features(states, dwells, lengths):
"""Extract per-session features. Returns (n_sess, 9) array."""
n, max_len = states.shape
mask = jnp.arange(max_len)[None,:] < lengths[:,None]
duration = jnp.sum(dwells * mask, axis=1)
total = lengths.astype(jnp.float32)
count = lambda idx: jnp.sum((states == idx) & mask, axis=1).astype(jnp.float32)
views, learn, carts, purchases = count(1), count(2), count(3), count(4)
velocity = total / (duration + 1e-6)
conversion = purchases / (views + 1e-6)
avg_dwell = duration / (total + 1e-6)
return jnp.stack([duration, avg_dwell, total, velocity, views, carts, purchases, learn, conversion], axis=1)
def session_features(batch: SessionBatch) -> np.ndarray:
if JAX_AVAILABLE:
return np.asarray(extract_features(jnp.array(batch.states), jnp.array(batch.dwells), jnp.array(batch.lengths)))
# numpy fallback
n, max_len = batch.states.shape
mask = np.arange(max_len)[None,:] < batch.lengths[:,None]
duration = np.sum(batch.dwells * mask, axis=1)
total = batch.lengths.astype(np.float32)
count = lambda idx: np.sum((batch.states == idx) & mask, axis=1).astype(np.float32)
views, learn, carts, purchases = count(1), count(2), count(3), count(4)
return np.stack([duration, duration/(total+1e-6), total, total/(duration+1e-6), views, carts, purchases, learn, purchases/(views+1e-6)], axis=1)
@jit
def session_transitions(states, lengths, n_states=N_STATES):
"""Compute empirical transition counts per session. Returns (n_sess, n_states, n_states)."""
n, max_len = states.shape
mask = jnp.arange(max_len - 1)[None,:] < (lengths[:,None] - 1)
src, dst = states[:, :-1], states[:, 1:]
# handle -1 padding by clamping to valid range
src_c, dst_c = jnp.clip(src, 0, n_states-1), jnp.clip(dst, 0, n_states-1)
valid = mask & (src >= 0) & (dst >= 0)
def per_session(i):
s, d, v = src_c[i], dst_c[i], valid[i]
trans = (jnp.eye(n_states)[s,:,None] * jnp.eye(n_states)[d,None,:]).sum(0) * v[:,None,None]
return trans.sum(0)
# vmap not ideal here, use manual loop for clarity
trans = jnp.stack([per_session(i) for i in range(n)])
row_sums = trans.sum(axis=-1, keepdims=True)
return trans / (row_sums + 1e-10)
def compute_session_transitions(batch: SessionBatch) -> np.ndarray:
if JAX_AVAILABLE:
return np.asarray(session_transitions(jnp.array(batch.states), jnp.array(batch.lengths)))
# numpy fallback
n, max_len = batch.states.shape
trans = np.zeros((n, N_STATES, N_STATES), dtype=np.float32)
for i in range(n):
for t in range(batch.lengths[i] - 1):
s, d = batch.states[i, t], batch.states[i, t+1]
if s >= 0 and d >= 0: trans[i, s, d] += 1
row_sums = trans.sum(axis=-1, keepdims=True)
return trans / (row_sums + 1e-10)

View File

@@ -0,0 +1,43 @@
"""Vectorized KL divergence for separability scoring."""
import numpy as np
from typing import Tuple
try:
import jax.numpy as jnp
from jax import jit
JAX_AVAILABLE = True
except ImportError:
jnp, JAX_AVAILABLE = np, False
def jit(f): return f
@jit
def batch_kl(P, Q_human, Q_agent, eps=1e-10):
"""Compute KL(P||Q) for batched P. P:(n,s,s), Q:(s,s). Returns (delta_h, delta_a) each (n,)."""
p = P + eps
p = p / p.sum(axis=-1, keepdims=True)
qh, qa = Q_human[None] + eps, Q_agent[None] + eps
delta_h = jnp.sum(p * jnp.log(p / qh), axis=(1, 2))
delta_a = jnp.sum(p * jnp.log(p / qa), axis=(1, 2))
return delta_h, delta_a
def compute_divergences(session_trans: np.ndarray, ref_human: np.ndarray, ref_agent: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Compute KL divergence of each session from human/agent prototypes."""
if JAX_AVAILABLE:
dh, da = batch_kl(jnp.array(session_trans), jnp.array(ref_human), jnp.array(ref_agent))
return np.asarray(dh), np.asarray(da)
# numpy fallback
eps = 1e-10
p = session_trans + eps
p = p / p.sum(axis=-1, keepdims=True)
qh, qa = ref_human[None] + eps, ref_agent[None] + eps
delta_h = np.sum(p * np.log(p / qh), axis=(1, 2))
delta_a = np.sum(p * np.log(p / qa), axis=(1, 2))
return delta_h, delta_a
def estimate_alpha_batch(prob_agent: np.ndarray, delta_h: np.ndarray, delta_a: np.ndarray, temp: float = 1.0) -> np.ndarray:
"""Vectorized alpha estimation from classifier probs and divergences."""
mass = delta_h + delta_a
ratio = np.where(mass > 1e-8, delta_a / mass, 0.5)
blended = 0.5 * prob_agent + 0.5 * ratio
if temp <= 0: return np.clip(blended, 0.0, 1.0)
return np.clip(1.0 / (1.0 + np.exp(-temp * (blended - 0.5))), 0.0, 1.0)

View File

@@ -0,0 +1,116 @@
"""Vectorized Markov chain session sampling with JAX."""
from typing import NamedTuple, Tuple
import numpy as np
from functools import partial
try:
import jax, jax.numpy as jnp
from jax import lax
JAX_AVAILABLE = True
except ImportError:
JAX_AVAILABLE = False
from .transitions import TransitionData, N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX
class SessionBatch(NamedTuple):
states: np.ndarray # (n_sess, max_len) state indices, -1=padding
dwells: np.ndarray # (n_sess, max_len) dwell times
products: np.ndarray # (n_sess,) product index per session
actors: np.ndarray # (n_sess,) 0=human, 1=agent
lengths: np.ndarray # (n_sess,) actual session length
class SimResult(NamedTuple):
demand_human: np.ndarray
demand_agent: np.ndarray
revenue: float
revenue_oracle: float
agent_loss: float
coi: float
look_to_book: float
mean_sale_price: float
n_human_purchases: int
n_agent_purchases: int
sessions: SessionBatch
if JAX_AVAILABLE:
@partial(jax.jit, static_argnums=(5,6,7))
def _sample_sessions_jax(key, T_human, T_agent, dwell_human, dwell_agent, n_human, n_agent, max_steps):
n = n_human + n_agent
k1, k2, k3, k4 = jax.random.split(key, 4)
actors = jnp.concatenate([jnp.zeros(n_human, dtype=jnp.int32), jnp.ones(n_agent, dtype=jnp.int32)])
T = jnp.where(actors[:,None,None]==0, T_human[None], T_agent[None]) # (n,6,6)
dwell_p = jnp.where(actors[:,None,None]==0, dwell_human[None], dwell_agent[None]) # (n,6,2)
def step(carry, _):
s, active, k = carry
k, k1, k2 = jax.random.split(k, 3)
probs = T[jnp.arange(n), s] # (n,6)
nxt = jax.random.categorical(k1, jnp.log(probs + 1e-10))
nxt = jnp.where(active, nxt, -1)
shape = dwell_p[jnp.arange(n), s, 0]
scale = dwell_p[jnp.arange(n), s, 1]
dwell = jnp.maximum(0.3, jax.random.gamma(k2, shape) * scale)
still = active & (nxt != TERM_IDX) & (nxt >= 0)
return (nxt, still, k), (nxt, dwell)
init = (jnp.zeros(n, dtype=jnp.int32), jnp.ones(n, dtype=jnp.bool_), k3)
_, (states, dwells) = lax.scan(step, init, None, length=max_steps)
states, dwells = states.T, dwells.T # (n, max_steps)
is_term = (states == -1) | (states == TERM_IDX)
lengths = jnp.argmax(is_term, axis=1) + 1
lengths = jnp.where(jnp.any(is_term, axis=1), lengths, max_steps)
return states, dwells, actors, lengths
def sample_sessions(key, trans: TransitionData, n_human: int, n_agent: int, n_products: int, max_steps: int = 40) -> SessionBatch:
if JAX_AVAILABLE:
k1, k2 = jax.random.split(key)
states, dwells, actors, lengths = _sample_sessions_jax(k1, trans.human_T, trans.agent_T, trans.human_dwell, trans.agent_dwell, n_human, n_agent, max_steps)
products = jax.random.randint(k2, (n_human + n_agent,), 0, n_products)
return SessionBatch(np.asarray(states), np.asarray(dwells), np.asarray(products), np.asarray(actors), np.asarray(lengths))
# numpy fallback
rng = np.random.default_rng(int(key[0]) if hasattr(key, '__getitem__') else 42)
n = n_human + n_agent
actors = np.concatenate([np.zeros(n_human, dtype=np.int32), np.ones(n_agent, dtype=np.int32)])
products = rng.integers(0, n_products, size=n)
states, dwells = np.full((n, max_steps), -1, dtype=np.int32), np.zeros((n, max_steps), dtype=np.float32)
lengths = np.zeros(n, dtype=np.int32)
for i in range(n):
T = trans.human_T if actors[i] == 0 else trans.agent_T
dp = trans.human_dwell if actors[i] == 0 else trans.agent_dwell
s, t = 0, 0
while t < max_steps and s != TERM_IDX:
states[i, t] = s
dwells[i, t] = max(0.3, rng.gamma(dp[s, 0], dp[s, 1]))
s = rng.choice(N_STATES, p=T[s])
t += 1
lengths[i] = t
return SessionBatch(states, dwells, products, actors, lengths)
def compute_metrics(batch: SessionBatch, prices: np.ndarray, unit_cost: np.ndarray, base_price: np.ndarray) -> SimResult:
purchased = np.any(batch.states == PURCHASE_IDX, axis=1)
human_mask, agent_mask = batch.actors == 0, batch.actors == 1
human_purch, agent_purch = purchased & human_mask, purchased & agent_mask
demand_h = np.bincount(batch.products[human_purch], minlength=len(prices)).astype(np.float32)
demand_a = np.bincount(batch.products[agent_purch], minlength=len(prices)).astype(np.float32)
# revenue and oracle
purch_products = batch.products[purchased]
revenue = float(np.sum(prices[purch_products]))
revenue_oracle = float(np.sum(base_price[purch_products]))
# agent loss: base_price - price_paid for agent purchases (agents gaming the system)
agent_products = batch.products[agent_purch]
agent_loss = float(np.sum(base_price[agent_products] - prices[agent_products]))
# COI: margin - expected_premium*0.5 for human purchases
human_products = batch.products[human_purch]
if len(human_products) > 0:
margin = float(np.mean(prices[human_products] - unit_cost[human_products]))
premium = float(np.mean(base_price[human_products] - prices[human_products]))
coi = max(0.0, margin - premium * 0.5)
else:
coi = 0.0
# look to book: views / purchases
views = float(np.sum(batch.states == 1)) # view_item_page = index 1
n_purch = int(purchased.sum())
look_to_book = views / (n_purch + 1e-6)
mean_sale = float(np.mean(prices[purch_products])) if n_purch > 0 else 0.0
return SimResult(demand_h, demand_a, revenue, revenue_oracle, agent_loss, coi, look_to_book, mean_sale,
int(human_purch.sum()), int(agent_purch.sum()), batch)

View File

@@ -0,0 +1,47 @@
"""Dense transition matrices for JAX Markov chain sampling."""
from dataclasses import dataclass
import numpy as np
try:
import jax.numpy as jnp
JAX_AVAILABLE = True
except ImportError:
jnp, JAX_AVAILABLE = np, False
STATES = ["session_start", "view_item_page", "learn_more_about_item", "add_item_to_cart", "purchase_complete", "session_end"]
S2I = {s: i for i, s in enumerate(STATES)}
N_STATES, TERM_IDX, PURCHASE_IDX, CART_IDX = len(STATES), 5, 4, 3
@dataclass
class TransitionData:
human_T: np.ndarray # (6,6) transition probs
agent_T: np.ndarray # (6,6)
human_dwell: np.ndarray # (6,2) shape,scale
agent_dwell: np.ndarray # (6,2)
def to_jax(self):
if not JAX_AVAILABLE: return self
return TransitionData(*[jnp.array(x) for x in [self.human_T, self.agent_T, self.human_dwell, self.agent_dwell]])
def dict_to_dense(d):
m = np.zeros((N_STATES, N_STATES), dtype=np.float32)
for src, dsts in d.items():
if (i := S2I.get(src)) is not None:
for dst, p in dsts.items():
if (j := S2I.get(dst)) is not None: m[i,j] = p
m /= np.maximum(m.sum(1, keepdims=True), 1e-8)
m[TERM_IDX] = 0; m[TERM_IDX, TERM_IDX] = 1.0
return m
def compile_transitions(human_profile, agent_profile):
def dwell_arr(params): return np.array([[params.get(s, (2.0, 1.0)) for s in STATES]], dtype=np.float32).reshape(N_STATES, 2)
return TransitionData(dict_to_dense(human_profile.transitions), dict_to_dense(agent_profile.transitions),
dwell_arr(human_profile.dwell_params), dwell_arr(agent_profile.dwell_params))
def fallback_transitions():
H = {"session_start": {"view_item_page": .85, "session_end": .15}, "view_item_page": {"learn_more_about_item": .4, "add_item_to_cart": .3, "view_item_page": .2, "session_end": .1},
"learn_more_about_item": {"add_item_to_cart": .5, "view_item_page": .3, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .6, "view_item_page": .25, "session_end": .15}, "purchase_complete": {"session_end": 1.0}}
A = {"session_start": {"view_item_page": .9, "session_end": .1}, "view_item_page": {"learn_more_about_item": .5, "add_item_to_cart": .25, "view_item_page": .15, "session_end": .1},
"learn_more_about_item": {"add_item_to_cart": .4, "view_item_page": .4, "session_end": .2}, "add_item_to_cart": {"purchase_complete": .5, "view_item_page": .3, "session_end": .2}, "purchase_complete": {"session_end": 1.0}}
dwell = np.full((N_STATES, 2), [2.0, 1.0], dtype=np.float32)
return TransitionData(dict_to_dense(H), dict_to_dense(A), dwell.copy(), dwell.copy())

175
sim/rl/train.py Normal file
View File

@@ -0,0 +1,175 @@
import numpy as np
import logging
from pathlib import Path
from typing import Dict, Type, Optional
import pickle
from torch.utils.tensorboard import SummaryWriter
from sim.rl.environment import PHANTOMEnv, BusinessLogicConstraints
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)
try:
from sim.rl.engine import (BasePricingEngine, WildPricingEngine, StaticPricingEngine,
SimpleDemandEngine, RandomWalkEngine, ThompsonSamplingEngine)
except ImportError as e:
BasePricingEngine = None # engines not required for basic usage
print(e)
"""
Target training loop:
have base prices p0 from env reset and run the env step, collect reward and metrics
pass this to the pricing engine which computes the price action to take based on previous reward by learning
the new action gets passed to the step
so we alternate, step -> reward -> engine (produces price delta) -> step with price delta -> reward
to make sure the reinforcement learning inside the engine can learn we need to have trajectory of prices
CURRENT SOLUTION BELOW does not implement correct learning or updates.
"""
class EngineTrainer:
"""wrapper to run pricing engines through episodes and collect metrics"""
def __init__(self, engine, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None):
self.engine = engine
self.env = env
self.episode_metrics = []
self.tb_writer = tb_writer
self.global_step = 0
def train(self, n_episodes: int, seed: int = 42):
for ep in range(n_episodes):
obs, _ = self.env.reset(seed=seed + ep)
self.engine.reset()
done = False
prev_prices = obs["elasticity"]["price"]
episode_reward = 0.0
last_info: Dict[str, float] = {}
while not done:
action_prices = self.engine.compute_prices(prev_prices, obs)
obs, reward, done, _, info = self.env.step(action_prices)
self.engine.update(obs, reward, done, info)
episode_reward += reward
prev_prices = obs["elasticity"]["price"]
last_info = info
if self.tb_writer:
self.tb_writer.add_scalar("reward/step", reward, self.global_step)
if "coi" in info:
self.tb_writer.add_scalar("diagnostics/coi", info["coi"], self.global_step)
if "alpha_hat" in info:
self.tb_writer.add_scalar("diagnostics/alpha_hat", info["alpha_hat"], self.global_step)
self.global_step += 1
last_info = dict(last_info)
last_info.update({"episode_reward": episode_reward, "episode": ep})
self.episode_metrics.append(last_info)
if self.tb_writer:
self.tb_writer.add_scalar("reward/episode", episode_reward, ep)
return self
def run_episode(self, seed: int = 42) -> Dict:
"""run single evaluation episode and return metrics"""
obs, _ = self.env.reset(seed=seed)
self.engine.reset()
total_reward = 0.0
prev_prices = obs["elasticity"]["price"]
ep_metrics = {'total_reward': 0.0}
done = False
while not done:
action_prices = self.engine.compute_prices(prev_prices, obs)
obs, reward, done, _, info = self.env.step(action_prices)
total_reward += reward
for k, v in info.items():
ep_metrics[k] = v
prev_prices = obs["elasticity"]["price"]
ep_metrics['total_reward'] = total_reward
return ep_metrics
def evaluate(self, n_episodes: int = 10, seed: int = 100) -> Dict:
"""evaluate trained engine"""
results = {k: [] for k in ['total_reward', 'revenue_observed', 'revenue_oracle',
'agent_loss', 'ux_volatility', 'look_to_book']}
for ep in range(n_episodes):
metrics = self.run_episode(seed=seed + ep)
for k in results:
results[k].append(metrics.get(k, 0.0))
return {k: (np.mean(v), np.std(v)) for k, v in results.items()}
def make_env():
return PHANTOMEnv(constraints=BusinessLogicConstraints())
def train_engine(engine_cls, env: PHANTOMEnv, n_episodes: int, seed: int = 42,
tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
constraints = env.constraints
engine = engine_cls(constraints=constraints, seed=seed)
trainer = EngineTrainer(engine, env, tb_writer=tb_writer)
trainer.train(n_episodes, seed=seed)
return trainer
def save_trainer(trainer: EngineTrainer, path: Path):
"""save engine state and metrics"""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, 'wb') as f:
pickle.dump({'engine': trainer.engine, 'metrics': trainer.episode_metrics}, f)
logger.info(f"Saved trainer to {path}")
def load_trainer(path: Path, env: PHANTOMEnv, tb_writer: Optional[SummaryWriter] = None) -> EngineTrainer:
"""load saved engine"""
with open(path, 'rb') as f:
data = pickle.load(f)
trainer = EngineTrainer(data['engine'], env, tb_writer=tb_writer)
trainer.episode_metrics = data['metrics']
return trainer
if __name__ == "__main__":
if BasePricingEngine is None:
logger.error("Engines not available, cannot run training")
exit(1)
base_dir = Path("./sim/rl/runs")
base_dir.mkdir(exist_ok=True)
engines = {
"Wild": WildPricingEngine,
"Static": StaticPricingEngine,
"RandomWalk": RandomWalkEngine,
"ThompsonSampling": ThompsonSamplingEngine,
}
n_train_episodes = 50
n_eval_episodes = 10
seed = 42
logger.info(f"Training config: {n_train_episodes} episodes per engine")
trained_trainers = {}
for engine_name, engine_cls in engines.items():
run_name = engine_name
log_dir = base_dir / run_name
log_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Training {engine_name}")
logger.info(f"Log directory: {log_dir}")
env = make_env()
tb_writer = SummaryWriter(log_dir=str(log_dir))
trainer = train_engine(engine_cls, env, n_train_episodes, seed, tb_writer=tb_writer)
tb_writer.close()
save_path = log_dir / "trainer.pkl"
save_trainer(trainer, save_path)
trained_trainers[run_name] = (trainer, env)
logger.info("Starting evaluation")
for run_name, (trainer, env) in trained_trainers.items():
logger.info(f"Evaluating {run_name}")
results = trainer.evaluate(n_episodes=n_eval_episodes, seed=seed + 1000)
for metric, (mean, std) in results.items():
logger.info(f" {metric:20s}: {mean:10.2f} ± {std:6.2f}")
logger.info(f"Results saved to: {base_dir}")

108
sim/strong_learner/data.py Normal file
View File

@@ -0,0 +1,108 @@
import os
import requests
try:
import py7zr # type: ignore
except ImportError: # pragma: no cover - optional dependency
py7zr = None
import pandas as pd
from typing import Generator
try:
from sim.rl.behavior_loader.loader import PayloadModel, ValueModel, InteractionModel, Loader
except ImportError:
from loader import PayloadModel, ValueModel, InteractionModel, Loader
class YooChooseLoader(Loader):
URL = "https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z"
CLICK_COLS = ['session_id', 'ts', 'item_id', 'category']
BUY_COLS = ['session_id', 'ts', 'item_id', 'price', 'quantity']
def __init__(self, root_dir: str = "data/yoochoose", chunk_size: int = 500_000, max_sessions: int = 1000):
self.root = root_dir
self.chunk_size = chunk_size
self.max_sessions = max_sessions
self.click_path = f"{root_dir}/yoochoose-clicks.dat"
self.buy_path = f"{root_dir}/yoochoose-buys.dat"
if not os.path.exists(self.click_path): self._setup()
self.data = self._load_sessions(max_sessions)
self.entries = list(self.data.keys())
def _setup(self):
if py7zr is None:
raise RuntimeError("py7zr is required to unpack YooChoose dataset. Install py7zr first.")
os.makedirs(self.root, exist_ok=True)
zip_path = f"{self.root}/temp.7z"
with requests.get(self.URL, stream=True) as r:
with open(zip_path, 'wb') as f:
for chunk in r.iter_content(8192):
f.write(chunk)
with py7zr.SevenZipFile(zip_path, 'r') as z:
z.extractall(self.root)
os.remove(zip_path)
def _make_interaction(self, sid: str, ts: str, item_id: str, event: str, page: str, meta: dict) -> InteractionModel:
payload = PayloadModel(
sessionId=sid, experimentId=None, eventName=event,
page=page, productId=item_id, metadata=meta,
storeMode="yoochoose", userAgent="dataset", ts=ts
)
return InteractionModel(
partitionID=0, offset=0, timestamp=0, compression="",
isTransactional=False, headers=[], key={},
value=ValueModel(payload=payload, encoding="json", isPayloadNull=False, schemaId=1, size=0)
)
def _parse_category(self, cat) -> str:
if pd.isna(cat) or cat == "0": return "unknown"
if cat == "S": return "special_offer"
try:
n = int(cat)
return f"category_{n}" if 1 <= n <= 12 else f"brand_{n}"
except: return str(cat)
def stream_clicks(self) -> Generator[InteractionModel, None, None]:
with pd.read_csv(self.click_path, names=self.CLICK_COLS, chunksize=self.chunk_size, header=None) as reader:
for chunk in reader:
for r in chunk.itertuples(index=False):
yield self._make_interaction(
str(r.session_id), r.ts, str(r.item_id),
"view_item_page", self._parse_category(r.category), {}
)
def stream_buys(self) -> Generator[InteractionModel, None, None]:
with pd.read_csv(self.buy_path, names=self.BUY_COLS, chunksize=self.chunk_size, header=None) as reader:
for chunk in reader:
for r in chunk.itertuples(index=False):
yield self._make_interaction(
str(r.session_id), r.ts, str(r.item_id),
"purchase_complete", "/checkout", {"price": r.price, "quantity": r.quantity}
)
def stream(self) -> Generator[InteractionModel, None, None]:
yield from self.stream_clicks()
yield from self.stream_buys()
def _load_sessions(self, max_sessions: int | None = None) -> dict:
sessions = {}
for interaction in self.stream():
sid = interaction.value.payload.sessionId
if sid not in sessions:
if max_sessions and len(sessions) >= max_sessions: continue
sessions[sid] = []
sessions[sid].append(interaction)
for sid in sessions: sessions[sid].sort(key=lambda x: x.value.payload.ts)
return sessions
def get_data(self) -> dict:
return self.data
def get_entries(self) -> tuple[list[str], int]:
return self.entries, len(self.entries)
if __name__ == "__main__":
loader = YooChooseLoader(max_sessions=100)
views, purchases = 0, 0
for sid, evts in loader.get_data().items():
for e in evts:
if e.value.payload.eventName == "view_item_page": views += 1
elif e.value.payload.eventName == "purchase_complete": purchases += 1
print(f"Loaded {len(loader.entries)} sessions: {views} view_item_page, {purchases} purchase_complete")

7
tests/e2e/.env.example Normal file
View File

@@ -0,0 +1,7 @@
WEB_URL=http://localhost:3000
BACKEND_URL=http://localhost:5000
PRICING_PROVIDER_URL=http://localhost:5001
AIRFLOW_URL=http://localhost:8085
AIRFLOW_USER=admin
AIRFLOW_PASS=admin
HEADLESS=true

View File

@@ -0,0 +1,61 @@
const AIRFLOW_URL = process.env.AIRFLOW_URL || 'http://localhost:8085';
const AUTH = 'Basic ' + Buffer.from(`${process.env.AIRFLOW_USER || 'admin'}:${process.env.AIRFLOW_PASS || 'admin'}`).toString('base64');
const req = (path: string, opts: any = {}) => {
const headers = { Authorization: AUTH, ...opts.headers };
return fetch(`${AIRFLOW_URL}${path}`, { ...opts, headers });
};
export const triggerDag = async (dagId: string, conf = {}) => {
const r = await req(`/api/v1/dags/${dagId}/dagRuns`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ conf }),
});
if (!r.ok) throw new Error(`Trigger DAG failed: ${r.status}`);
return (await r.json()).dag_run_id;
};
export const getDagStatus = async (dagId: string, runId: string) => {
const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`);
if (!r.ok) throw new Error(`Get status failed: ${r.status}`);
return (await r.json()).state;
};
export const cancelDag = async (dagId: string, runId: string) => {
const r = await req(`/api/v1/dags/${dagId}/dagRuns/${runId}`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ state: 'failed' }),
});
if (!r.ok) console.warn(`Failed to cancel DAG ${runId}: ${r.status}`);
};
export const waitForDag = async (dagId: string, runId: string, maxMs = 30000, pollMs = 1000) => {
const t0 = Date.now();
while (Date.now() - t0 < maxMs) {
const state = await getDagStatus(dagId, runId);
if (state === 'success') return;
if (state === 'failed') throw new Error(`DAG ${runId} failed`);
await new Promise(r => setTimeout(r, pollMs));
}
await cancelDag(dagId, runId);
throw new Error(`DAG ${runId} timeout`);
};
export const runDag = async (dagId: string, conf = {}, maxMs = 60000) => {
const runId = await triggerDag(dagId, conf);
await waitForDag(dagId, runId, maxMs);
};
export const runSessionPricing = (mode = 'hotel') =>
runDag('session_pricing_pipeline', { store_mode: mode, session_limit: 10 }, 90000);
export const runSurgePricing = (mode = 'hotel', highThresh = 10, lowThresh = 2) =>
runDag('surge_pricing_pipeline', {
store_mode: mode,
high_threshold: highThresh,
low_threshold: lowThresh,
surge_multiplier: 1.2,
discount_multiplier: 0.9
}, 90000);

View File

@@ -9,8 +9,8 @@ interface InteractionEvent {
const dumpKafkaTopic = async (backendUrl: string, topic: string) => { const dumpKafkaTopic = async (backendUrl: string, topic: string) => {
const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`); const resp = await fetch(`${backendUrl}/api/kafka/dump?topic=${topic}`);
if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`); if (!resp.ok) throw new Error(`Kafka dump failed: ${resp.status}`);
const { messages = [] } = await resp.json(); const { data = [] } = await resp.json();
return messages as any[]; return data as any[];
}; };
export const waitForInteractionEvent = async ( export const waitForInteractionEvent = async (

View File

@@ -5,14 +5,14 @@ export default defineConfig({
fullyParallel: true, fullyParallel: true,
forbidOnly: !!process.env.CI, forbidOnly: !!process.env.CI,
retries: 0, retries: 0,
workers: 5, workers: 1,
reporter: 'list', reporter: 'list',
use: { use: {
baseURL: process.env.WEB_URL || 'http://localhost:3000', baseURL: process.env.WEB_URL || 'http://localhost:3000',
trace: 'retain-on-failure', trace: 'retain-on-failure',
screenshot: 'only-on-failure', screenshot: 'only-on-failure',
}, },
timeout: 60000, timeout: 180000,
expect: { expect: {
timeout: 10000, timeout: 10000,
}, },

View File

@@ -9,6 +9,7 @@ import {
addToCart, addToCart,
} from '../helpers/interactions'; } from '../helpers/interactions';
import { getSessionEvents } from '../helpers/kafka'; import { getSessionEvents } from '../helpers/kafka';
import { runSessionPricing } from '../helpers/airflow';
test.describe('SessionAwarePricer E2E', () => { test.describe('SessionAwarePricer E2E', () => {
const STORE_TYPE = 'hotel'; const STORE_TYPE = 'hotel';
@@ -23,6 +24,9 @@ test.describe('SessionAwarePricer E2E', () => {
await page.waitForTimeout(1500); await page.waitForTimeout(1500);
const productId2 = await humanLikeViewProduct(page, STORE_TYPE); const productId2 = await humanLikeViewProduct(page, STORE_TYPE);
await runSessionPricing(STORE_TYPE);
const secondPrice = await getPriceFromDOM(page); const secondPrice = await getPriceFromDOM(page);
expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
@@ -40,11 +44,13 @@ test.describe('SessionAwarePricer E2E', () => {
await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE); await rapidViewProductViaFlow(page, 8, 100, STORE_TYPE);
expect(await verifySessionConsistency(page, sessionId)).toBeTruthy(); expect(await verifySessionConsistency(page, sessionId)).toBeTruthy();
await page.waitForTimeout(2500); await page.waitForTimeout(1000);
const events = await getSessionEvents(backendUrl, sessionId); const events = await getSessionEvents(backendUrl, sessionId);
expect(events.length).toBeGreaterThanOrEqual(8); expect(events.length).toBeGreaterThanOrEqual(8);
await runSessionPricing(STORE_TYPE);
await page.goto(`/products/${productId}`); await page.goto(`/products/${productId}`);
await page.waitForLoadState('networkidle'); await page.waitForLoadState('networkidle');
const agentPrice = await getPriceFromDOM(page); const agentPrice = await getPriceFromDOM(page);
@@ -59,14 +65,12 @@ test.describe('SessionAwarePricer E2E', () => {
const productId = await viewProductViaFlow(page, STORE_TYPE); const productId = await viewProductViaFlow(page, STORE_TYPE);
const baselinePrice = await getPriceFromDOM(page); const baselinePrice = await getPriceFromDOM(page);
const startTime = Date.now();
await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE); await rapidViewProductViaFlow(page, 10, 80, STORE_TYPE);
const duration = (Date.now() - startTime) / 1000;
const eventsPerSec = 10 / duration; const events = await getSessionEvents(backendUrl, sessionId);
expect(eventsPerSec).toBeGreaterThan(2.0); expect(events.length).toBeGreaterThanOrEqual(10);
await page.waitForTimeout(2000); await runSessionPricing(STORE_TYPE);
await page.goto(`/products/${productId}`); await page.goto(`/products/${productId}`);
await page.waitForLoadState('networkidle'); await page.waitForLoadState('networkidle');
@@ -105,8 +109,11 @@ test.describe('SessionAwarePricer E2E', () => {
await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE); await rapidViewProductViaFlow(page, 2, 150, STORE_TYPE);
await page.waitForTimeout(1500); await page.waitForTimeout(1000);
await humanLikeViewProduct(page, STORE_TYPE); await humanLikeViewProduct(page, STORE_TYPE);
await runSessionPricing(STORE_TYPE);
const finalPrice = await getPriceFromDOM(page); const finalPrice = await getPriceFromDOM(page);
expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3); expect(Math.abs(finalPrice - baselinePrice) / baselinePrice).toBeLessThan(0.3);

View File

@@ -7,6 +7,7 @@ import {
verifySessionConsistency, verifySessionConsistency,
} from '../helpers/interactions'; } from '../helpers/interactions';
import { waitForInteractionEvent, countProductViews } from '../helpers/kafka'; import { waitForInteractionEvent, countProductViews } from '../helpers/kafka';
import { runSurgePricing } from '../helpers/airflow';
test.describe('SimpleSurgePricer E2E', () => { test.describe('SimpleSurgePricer E2E', () => {
const STORE_TYPE = 'hotel'; const STORE_TYPE = 'hotel';
@@ -29,7 +30,7 @@ test.describe('SimpleSurgePricer E2E', () => {
await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE); await rapidViewProductViaFlow(page, 5, 200, STORE_TYPE);
await page.waitForTimeout(2000); await page.waitForTimeout(1000);
const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page'); const evt = await waitForInteractionEvent(backendUrl, sessionId, 'view_item_page');
expect(evt).not.toBeNull(); expect(evt).not.toBeNull();
@@ -37,6 +38,8 @@ test.describe('SimpleSurgePricer E2E', () => {
const viewCount = await countProductViews(backendUrl, productId); const viewCount = await countProductViews(backendUrl, productId);
expect(viewCount).toBeGreaterThanOrEqual(5); expect(viewCount).toBeGreaterThanOrEqual(5);
await runSurgePricing(STORE_TYPE, 3, 1);
await page.goto(`/products/${productId}`); await page.goto(`/products/${productId}`);
await page.waitForLoadState('networkidle'); await page.waitForLoadState('networkidle');
const surgedPrice = await getPriceFromDOM(page); const surgedPrice = await getPriceFromDOM(page);
@@ -72,7 +75,9 @@ test.describe('SimpleSurgePricer E2E', () => {
await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE); await rapidViewProductViaFlow(page, 5, 150, STORE_TYPE);
await page.waitForTimeout(1500); await page.waitForTimeout(1000);
await runSurgePricing(STORE_TYPE, 3, 1);
await page.goto(`/products/${productId}`); await page.goto(`/products/${productId}`);
await page.waitForLoadState('networkidle'); await page.waitForLoadState('networkidle');
@@ -81,6 +86,8 @@ test.describe('SimpleSurgePricer E2E', () => {
await page.waitForTimeout(12000); await page.waitForTimeout(12000);
await runSurgePricing(STORE_TYPE, 3, 1);
await page.goto(`/products/${productId}`); await page.goto(`/products/${productId}`);
await page.waitForLoadState('networkidle'); await page.waitForLoadState('networkidle');
const decayedPrice = await getPriceFromDOM(page); const decayedPrice = await getPriceFromDOM(page);

View File

@@ -30,6 +30,8 @@ export async function GET(req: NextRequest) {
const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001'; const providerUrl = process.env.PRICING_PROVIDER_URL || 'http://localhost:5001';
try { try {
const queryParams = new URLSearchParams(); const queryParams = new URLSearchParams();
// THIS is our entry point into the dynamic pricing where we reference the context of the sesion and experiment and ask for a price to assign to the trajectory which is expressed
// The whole pipeline gets triggered from here.
if (sessionId) queryParams.append('sessionId', sessionId); if (sessionId) queryParams.append('sessionId', sessionId);
if (experimentId) queryParams.append('experimentId', experimentId); if (experimentId) queryParams.append('experimentId', experimentId);
@@ -55,25 +57,26 @@ export async function GET(req: NextRequest) {
price = Math.round(randomBase * 100) / 100; price = Math.round(randomBase * 100) / 100;
} }
// log price to kafka for elasticity computation // log price to kafka asynchronously (non-blocking)
if (sessionId) { if (sessionId) {
const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000'; const backendUrl = process.env.BACKEND_URL || 'http://localhost:5000';
try { // fire and forget - don't await to avoid blocking response
await fetch(`${backendUrl}/api/kafka/price-log`, { fetch(`${backendUrl}/api/kafka/price-log`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ body: JSON.stringify({
productId, productId,
price, price,
sessionId, sessionId,
experimentId: experimentId || undefined, experimentId: experimentId || undefined,
storeMode, storeMode,
ts: timestamp, ts: timestamp,
}), }),
}); }).catch(err => {
} catch (err) { if (process.env.NODE_ENV === 'development') {
console.error('[price-log-error]', err); console.error('[price-log-error]', err);
} }
});
} }
if (process.env.NODE_ENV === 'development') { if (process.env.NODE_ENV === 'development') {

View File

@@ -32,7 +32,8 @@ export default function CartPage() {
{itemCount > 0 && ( {itemCount > 0 && (
<button <button
onClick={clearCart} onClick={clearCart}
className="text-sm text-red-600 hover:underline" className="text-sm hover:underline"
style={{ color: 'var(--accent-warning)' }}
> >
Clear cart Clear cart
</button> </button>
@@ -42,7 +43,7 @@ export default function CartPage() {
{itemCount === 0 ? ( {itemCount === 0 ? (
<div className="text-center py-12"> <div className="text-center py-12">
<p className="text-gray-500 mb-4">Your cart is empty</p> <p className="text-gray-500 mb-4">Your cart is empty</p>
<a href="/" className="text-blue-600 hover:underline">Browse our selection</a> <a href="/" className="hover:underline" style={{ color: 'var(--text-accent)' }}>Browse our selection</a>
</div> </div>
) : ( ) : (
<> <>
@@ -54,15 +55,11 @@ export default function CartPage() {
> >
<div className="flex-1"> <div className="flex-1">
<div className="flex items-center gap-2 mb-1"> <div className="flex items-center gap-2 mb-1">
<span className="px-2 py-0.5 text-xs font-medium rounded bg-blue-100 text-blue-800">
{item.type}
</span>
<h3 className="font-semibold">{item.name}</h3> <h3 className="font-semibold">{item.name}</h3>
</div> </div>
{item.type === 'hotel' && ( {item.type === 'hotel' && (
<div className="text-sm text-gray-600"> <div className="text-sm text-gray-600">
<p>{String(item.metadata.roomType)}</p>
<p>{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}</p> <p>{String(item.metadata.checkIn)} - {String(item.metadata.checkOut)}</p>
<p>{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}</p> <p>{String(item.metadata.nights)} night{Number(item.metadata.nights) > 1 ? 's' : ''}</p>
</div> </div>
@@ -81,7 +78,8 @@ export default function CartPage() {
<p className="text-xl font-bold mb-2">${item.price}</p> <p className="text-xl font-bold mb-2">${item.price}</p>
<button <button
onClick={() => handleRemove(item.id, item.type)} onClick={() => handleRemove(item.id, item.type)}
className="text-sm text-red-600 hover:underline" className="text-sm hover:underline"
style={{ color: 'var(--accent-warning)' }}
> >
Remove Remove
</button> </button>
@@ -100,7 +98,7 @@ export default function CartPage() {
dispatchInteraction('checkout_start', undefined, { total, itemCount }); dispatchInteraction('checkout_start', undefined, { total, itemCount });
window.location.href = '/checkout'; window.location.href = '/checkout';
}} }}
className="w-full py-3 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors" className="btn-primary w-full"
> >
Proceed to Checkout Proceed to Checkout
</button> </button>

View File

@@ -8,6 +8,9 @@
--bg-secondary: #f5f5f5; --bg-secondary: #f5f5f5;
--text-primary: #333333; --text-primary: #333333;
--text-secondary: #666666; --text-secondary: #666666;
--accent-primary: #007aff;
--accent-primary-hover: #0051d5;
--accent-primary-light: #e6f2ff;
--spacing-sm: 8px; --spacing-sm: 8px;
--spacing-md: 16px; --spacing-md: 16px;
--spacing-lg: 32px; --spacing-lg: 32px;

View File

@@ -15,8 +15,8 @@ const geistMono = Geist_Mono({
}); });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Create Next App", title: "Travel Booking Platform",
description: "Generated by create next app", description: "Book flights and hotels with dynamic pricing",
}; };
export default function RootLayout({ export default function RootLayout({

View File

@@ -1,65 +1,5 @@
import Image from "next/image"; import { redirect } from 'next/navigation';
export default function Home() { export default function Home() {
return ( redirect('/hotel');
<div className="flex min-h-screen items-center justify-center bg-zinc-50 font-sans dark:bg-black">
<main className="flex min-h-screen w-full max-w-3xl flex-col items-center justify-between py-32 px-16 bg-white dark:bg-black sm:items-start">
<Image
className="dark:invert"
src="/next.svg"
alt="Next.js logo"
width={100}
height={20}
priority
/>
<div className="flex flex-col items-center gap-6 text-center sm:items-start sm:text-left">
<h1 className="max-w-xs text-3xl font-semibold leading-10 tracking-tight text-black dark:text-zinc-50">
To get started, edit the page.tsx file.
</h1>
<p className="max-w-md text-lg leading-8 text-zinc-600 dark:text-zinc-400">
Looking for a starting point or more instructions? Head over to{" "}
<a
href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
className="font-medium text-zinc-950 dark:text-zinc-50"
>
Templates
</a>{" "}
or the{" "}
<a
href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
className="font-medium text-zinc-950 dark:text-zinc-50"
>
Learning
</a>{" "}
center.
</p>
</div>
<div className="flex flex-col gap-4 text-base font-medium sm:flex-row">
<a
className="flex h-12 w-full items-center justify-center gap-2 rounded-full bg-foreground px-5 text-background transition-colors hover:bg-[#383838] dark:hover:bg-[#ccc] md:w-[158px]"
href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
target="_blank"
rel="noopener noreferrer"
>
<Image
className="dark:invert"
src="/vercel.svg"
alt="Vercel logomark"
width={16}
height={16}
/>
Deploy Now
</a>
<a
className="flex h-12 w-full items-center justify-center rounded-full border border-solid border-black/[.08] px-5 transition-colors hover:border-transparent hover:bg-black/[.04] dark:border-white/[.145] dark:hover:bg-[#1a1a1a] md:w-[158px]"
href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
target="_blank"
rel="noopener noreferrer"
>
Documentation
</a>
</div>
</main>
</div>
);
} }

View File

@@ -2,6 +2,7 @@
import type { EventName } from '@/lib/events'; import type { EventName } from '@/lib/events';
import type { Hotel } from '@/lib/hotel-utils'; import type { Hotel } from '@/lib/hotel-utils';
import { getHotelImageUrl } from '@/lib/hotel-utils';
import { useHoverTracking } from '@/hooks/useHoverTracking'; import { useHoverTracking } from '@/hooks/useHoverTracking';
import PriceDisplay from '@/components/ui/PriceDisplay'; import PriceDisplay from '@/components/ui/PriceDisplay';
@@ -47,8 +48,6 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
window.location.href = `/hotel/products/${hotel.id}`; window.location.href = `/hotel/products/${hotel.id}`;
}; };
const imageUrl = `https://images.unsplash.com/photo-1551882547-ff40c63fe5fa?w=400&h=300&fit=crop`;
return ( return (
<div <div
className="hotel-card cursor-pointer" className="hotel-card cursor-pointer"
@@ -56,7 +55,7 @@ export default function HotelCard({ hotel }: { hotel: Hotel }) {
> >
<div className="hotel-image relative overflow-hidden"> <div className="hotel-image relative overflow-hidden">
<img <img
src={imageUrl} src={getHotelImageUrl(hotel.id, { w: 400, h: 300 })}
alt={hotel.name} alt={hotel.name}
className="w-full h-full object-cover" className="w-full h-full object-cover"
onError={(e) => { onError={(e) => {

View File

@@ -2,6 +2,7 @@
import { useState, useEffect } from 'react'; import { useState, useEffect } from 'react';
import type { Hotel } from '@/lib/hotel-utils'; import type { Hotel } from '@/lib/hotel-utils';
import { getHotelImageUrl } from '@/lib/hotel-utils';
import PriceDisplay from '@/components/ui/PriceDisplay'; import PriceDisplay from '@/components/ui/PriceDisplay';
interface HotelDetailsProps { interface HotelDetailsProps {
@@ -43,13 +44,11 @@ const PriceTotalDisplay = ({ productId, nights }: { productId: string; nights: n
}; };
export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) { export default function HotelDetails({ product, onAddToCart, addedToCart }: HotelDetailsProps) {
const imageUrl = `https://images.unsplash.com/photo-1566073771259-6a8506099945?w=800&h=600&fit=crop`;
return ( return (
<div className="w-full flex flex-col lg:flex-row gap-12 py-8"> <div className="w-full flex flex-col lg:flex-row gap-12 py-8">
<div className="w-full lg:w-1/2 rounded-lg aspect-[4/3] overflow-hidden shrink-0"> <div className="w-full lg:w-1/2 rounded-lg aspect-[4/3] overflow-hidden shrink-0">
<img <img
src={imageUrl} src={getHotelImageUrl(product.id, { w: 800, h: 600 })}
alt={product.name} alt={product.name}
className="w-full h-full object-cover" className="w-full h-full object-cover"
onError={(e) => { onError={(e) => {

View File

@@ -20,7 +20,7 @@ const NavLink = ({ href, children }: { href: string; children: React.ReactNode }
href={href} href={href}
className={`px-4 py-2 rounded-md transition-colors ${ className={`px-4 py-2 rounded-md transition-colors ${
isActive isActive
? 'bg-[var(--accent-primary)] font-semibold' ? 'bg-[var(--accent-primary)] text-white font-semibold'
: 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]' : 'hover:bg-[var(--accent-primary-light)] text-[var(--text-primary)]'
}`} }`}
> >

View File

@@ -31,7 +31,7 @@ export interface Flight {
availability: number; availability: number;
} }
const EPOCH = new Date(0); import { dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
export const transformProduct = (p: AirlineProduct): Flight => { export const transformProduct = (p: AirlineProduct): Flight => {
const { id, flight_type, date_index, metadata, availability } = p; const { id, flight_type, date_index, metadata, availability } = p;
@@ -52,24 +52,4 @@ export const transformProduct = (p: AirlineProduct): Flight => {
}; };
}; };
// convert date string to days from today export { dateToDaysFromToday, dateToIndex, todayIndex };
export const dateToDaysFromToday = (dateStr: string): number => {
const target = new Date(dateStr);
target.setHours(0, 0, 0, 0);
const today = new Date();
today.setHours(0, 0, 0, 0);
return Math.floor((target.getTime() - today.getTime()) / 86400000);
};
// convert date string to date_index (days since epoch)
export const dateToIndex = (dateStr: string): number => {
const d = new Date(dateStr);
return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000);
};
// get current date_index
export const todayIndex = (): number => {
const now = new Date();
now.setHours(0, 0, 0, 0);
return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
};

23
web/src/lib/date-utils.ts Normal file
View File

@@ -0,0 +1,23 @@
const EPOCH = new Date(0);
const MS_PER_DAY = 86400000;
export const dateToDaysFromToday = (dateStr: string): number => {
const target = new Date(dateStr);
target.setHours(0, 0, 0, 0);
const today = new Date();
today.setHours(0, 0, 0, 0);
return Math.floor((target.getTime() - today.getTime()) / MS_PER_DAY);
};
export const dateToIndex = (dateStr: string): number => {
const d = new Date(dateStr);
return Math.floor((d.getTime() - EPOCH.getTime()) / MS_PER_DAY);
};
export const todayIndex = (): number => {
const now = new Date();
now.setHours(0, 0, 0, 0);
return Math.floor((now.getTime() - EPOCH.getTime()) / MS_PER_DAY);
};
export { EPOCH, MS_PER_DAY };

View File

@@ -25,7 +25,7 @@ export interface Hotel {
nights: number; nights: number;
} }
const EPOCH = new Date(0); import { EPOCH, MS_PER_DAY, dateToDaysFromToday, dateToIndex, todayIndex } from './date-utils';
export const transformProduct = (p: HotelProduct): Hotel => { export const transformProduct = (p: HotelProduct): Hotel => {
const { id, room_type, date_index, metadata } = p; const { id, room_type, date_index, metadata } = p;
@@ -37,14 +37,14 @@ export const transformProduct = (p: HotelProduct): Hotel => {
// legacy: treat as offset from today // legacy: treat as offset from today
const today = new Date(); const today = new Date();
today.setHours(0, 0, 0, 0); today.setHours(0, 0, 0, 0);
checkIn = new Date(today.getTime() + date_index * 86400000); checkIn = new Date(today.getTime() + date_index * MS_PER_DAY);
} else { } else {
// proper: days since epoch // proper: days since epoch
checkIn = new Date(EPOCH.getTime() + date_index * 86400000); checkIn = new Date(EPOCH.getTime() + date_index * MS_PER_DAY);
} }
const nights = 1; const nights = 1;
const checkOut = new Date(checkIn.getTime() + nights * 86400000); const checkOut = new Date(checkIn.getTime() + nights * MS_PER_DAY);
const formatOpts: Intl.DateTimeFormatOptions = { const formatOpts: Intl.DateTimeFormatOptions = {
month: 'short', month: 'short',
@@ -65,24 +65,34 @@ export const transformProduct = (p: HotelProduct): Hotel => {
}; };
}; };
// convert date string to days from today const hotelImagePool = [
export const dateToDaysFromToday = (dateStr: string): number => { 'photo-1566073771259-6a8506099945',
const target = new Date(dateStr); 'photo-1551882547-ff40c63fe5fa',
target.setHours(0, 0, 0, 0); 'photo-1590490360182-c33d57733427',
const today = new Date(); 'photo-1582719478250-c89cae4dc85b',
today.setHours(0, 0, 0, 0); 'photo-1596701062351-8c2c14d1fdd0',
return Math.floor((target.getTime() - today.getTime()) / 86400000); 'photo-1631049307264-da0ec9d70304',
'photo-1578683010236-d716f9a3f461',
'photo-1540518614846-7eded433c457',
'photo-1505693416388-ac5ce068fe85',
'photo-1522771739844-6a9f6d5f14af',
'photo-1562438668-bcf0ca6578f0',
'photo-1595576508898-0ad5c879a061',
];
const hashString = (s: string): number => {
let h = 0;
for (let i = 0; i < s.length; i++) {
h = ((h << 5) - h) + s.charCodeAt(i);
h = h & h;
}
return Math.abs(h);
}; };
// convert date string to date_index (days since epoch) export const getHotelImageUrl = (hotelId: string, size: { w: number; h: number } = { w: 400, h: 300 }): string => {
export const dateToIndex = (dateStr: string): number => { const idx = hashString(hotelId) % hotelImagePool.length;
const d = new Date(dateStr); const photoId = hotelImagePool[idx];
return Math.floor((d.getTime() - EPOCH.getTime()) / 86400000); return `https://images.unsplash.com/${photoId}?w=${size.w}&h=${size.h}&fit=crop`;
}; };
// get current date_index export { dateToDaysFromToday, dateToIndex, todayIndex };
export const todayIndex = (): number => {
const now = new Date();
now.setHours(0, 0, 0, 0);
return Math.floor((now.getTime() - EPOCH.getTime()) / 86400000);
};