mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
feat: fixing alignment w premiums and specific extraction of data
This commit is contained in:
@@ -7,7 +7,7 @@ from types import SimpleNamespace
|
|||||||
from typing import Optional, Dict, Any, List, Tuple
|
from typing import Optional, Dict, Any, List, Tuple
|
||||||
|
|
||||||
from lib.separability import load_artifacts, score_session, estimate_alpha
|
from lib.separability import load_artifacts, score_session, estimate_alpha
|
||||||
from sim.rl.behavior_loader.models import AgentBehaviorModel, BehaviorModel
|
from sim.rl.behavior_loader.models import AgentBehaviorModel, BehaviorModel, aggregate_event_transitions
|
||||||
|
|
||||||
# "learner" agent learning to optimize pricing
|
# "learner" agent learning to optimize pricing
|
||||||
# "agent" part of environment creating demand signals that learner processes
|
# "agent" part of environment creating demand signals that learner processes
|
||||||
@@ -52,8 +52,8 @@ EVENT_PAGE_MAP = {
|
|||||||
|
|
||||||
|
|
||||||
class BehavioralProfile:
|
class BehavioralProfile:
|
||||||
"""Synthetic Markov profile used to generate interaction sessions."""
|
"""Synthetic Markov profile used to generate interaction sessions.
|
||||||
# TODO: a lot of this is duplicated from models.py - refactor to share code better
|
Uses aggregate_event_transitions from models.py to build transition kernels from real data."""
|
||||||
|
|
||||||
def __init__(self, actor: str, purchase_probs: np.ndarray):
|
def __init__(self, actor: str, purchase_probs: np.ndarray):
|
||||||
self.actor = actor
|
self.actor = actor
|
||||||
@@ -66,11 +66,31 @@ class BehavioralProfile:
|
|||||||
"purchase_complete",
|
"purchase_complete",
|
||||||
"session_end",
|
"session_end",
|
||||||
]
|
]
|
||||||
# base transition structure (human default)
|
|
||||||
self.transitions : Dict[str, Dict[str, float]];
|
|
||||||
|
|
||||||
model = AgentBehaviorModel(agent_dir) if actor == "agents" else BehaviorModel(human_dir)
|
model = AgentBehaviorModel(agent_dir) if actor == "agents" else BehaviorModel(human_dir)
|
||||||
self.transitions = # TODO similarly to model.build_MDP_event_transitions() in models.py buidl the dict
|
mdp = model.build_MDP()
|
||||||
|
self.transitions = aggregate_event_transitions(mdp) if mdp.get("transitions") else self._fallback_transitions()
|
||||||
|
self.dwell_params = self._extract_dwell_params(mdp)
|
||||||
|
|
||||||
|
def _fallback_transitions(self) -> Dict[str, Dict[str, float]]:
|
||||||
|
# sensible defaults if no data available
|
||||||
|
return {
|
||||||
|
"session_start": {"view_item_page": 0.85, "session_end": 0.15},
|
||||||
|
"view_item_page": {"learn_more_about_item": 0.4, "add_item_to_cart": 0.3, "view_item_page": 0.2, "session_end": 0.1},
|
||||||
|
"learn_more_about_item": {"add_item_to_cart": 0.5, "view_item_page": 0.3, "session_end": 0.2},
|
||||||
|
"add_item_to_cart": {"purchase_complete": 0.6, "view_item_page": 0.25, "session_end": 0.15},
|
||||||
|
"purchase_complete": {"session_end": 1.0},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_dwell_params(self, mdp: Dict) -> Dict[str, Tuple[float, float]]:
|
||||||
|
# derive gamma params (shape, scale) from state_rewards which encode temporal progression
|
||||||
|
state_vals = mdp.get("state_values", {})
|
||||||
|
params = {}
|
||||||
|
for state in self.states:
|
||||||
|
val = state_vals.get(state, 0.5)
|
||||||
|
shape = 1.5 + val * 2.0 # higher progression -> longer dwell
|
||||||
|
scale = 0.8 + (1.0 - val) * 1.2
|
||||||
|
params[state] = (shape, scale)
|
||||||
|
return params
|
||||||
|
|
||||||
def _transition_probs(self, state: str, product_idx: int) -> Dict[str, float]:
|
def _transition_probs(self, state: str, product_idx: int) -> Dict[str, float]:
|
||||||
probs = dict(self.transitions.get(state, {"session_end": 1.0}))
|
probs = dict(self.transitions.get(state, {"session_end": 1.0}))
|
||||||
@@ -100,11 +120,7 @@ class BehavioralProfile:
|
|||||||
prices: np.ndarray,
|
prices: np.ndarray,
|
||||||
unit_cost: np.ndarray,
|
unit_cost: np.ndarray,
|
||||||
) -> Tuple[List[Dict[str, Any]], List[SimpleNamespace]]:
|
) -> Tuple[List[Dict[str, Any]], List[SimpleNamespace]]:
|
||||||
"""Generate a single session trajectory."""
|
"""Generate a single session trajectory respecting business constraints."""
|
||||||
# TODO: this is similar to the sample trajectory method in models.
|
|
||||||
# we also have to respect business constraints which constrain the lipshitz continuity of the transitions and prices
|
|
||||||
# we must apply constraints on purcahses not to let the platform offer prices under the cost of a productid
|
|
||||||
|
|
||||||
events: List[Dict[str, Any]] = []
|
events: List[Dict[str, Any]] = []
|
||||||
feature_events: List[SimpleNamespace] = []
|
feature_events: List[SimpleNamespace] = []
|
||||||
state = "session_start"
|
state = "session_start"
|
||||||
@@ -112,25 +128,30 @@ class BehavioralProfile:
|
|||||||
product_idx = int(rng.integers(0, len(prices)))
|
product_idx = int(rng.integers(0, len(prices)))
|
||||||
product_id = f"product-{product_idx:04d}"
|
product_id = f"product-{product_idx:04d}"
|
||||||
|
|
||||||
|
|
||||||
|
# enforce price >= cost constraint (lipschitz bound on pricing)
|
||||||
|
# This is a sort of last resort to not let an pricing learner go rogue
|
||||||
|
cost = float(unit_cost[product_idx])
|
||||||
|
constrained_price = max(float(prices[product_idx]), cost * 1.05) # 5% min margin
|
||||||
|
|
||||||
while state != "session_end" and len(events) < 40:
|
while state != "session_end" and len(events) < 40:
|
||||||
if state != "session_start":
|
if state != "session_start":
|
||||||
price = float(prices[product_idx])
|
|
||||||
row = {
|
row = {
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"actor": "agent" if self.actor == "agents" else "human",
|
"actor": "agent" if self.actor == "agents" else "human",
|
||||||
"eventName": state,
|
"eventName": state,
|
||||||
"product_idx": product_idx,
|
"product_idx": product_idx,
|
||||||
"productId": product_id,
|
"productId": product_id,
|
||||||
"price_offered": price,
|
"price_offered": constrained_price,
|
||||||
"price_paid": 0.0,
|
"price_paid": 0.0,
|
||||||
"page": EVENT_PAGE_MAP.get(state, "/"),
|
"page": EVENT_PAGE_MAP.get(state, "/"),
|
||||||
"ts": t,
|
"ts": t,
|
||||||
"unit_cost": float(unit_cost[product_idx]),
|
"unit_cost": cost,
|
||||||
"base_price": float(prices[product_idx]),
|
"base_price": float(prices[product_idx]),
|
||||||
}
|
}
|
||||||
if state == "purchase_complete":
|
if state == "purchase_complete":
|
||||||
noise = float(rng.normal(0.0, 0.015))
|
noise = float(rng.normal(0.0, 0.015))
|
||||||
row["price_paid"] = max(price * (1.0 + noise), row["unit_cost"])
|
row["price_paid"] = max(constrained_price * (1.0 + noise), cost)
|
||||||
events.append(row)
|
events.append(row)
|
||||||
feature_events.append(
|
feature_events.append(
|
||||||
SimpleNamespace(
|
SimpleNamespace(
|
||||||
@@ -143,7 +164,8 @@ class BehavioralProfile:
|
|||||||
|
|
||||||
transitions = self._transition_probs(state, product_idx)
|
transitions = self._transition_probs(state, product_idx)
|
||||||
next_state = rng.choice(list(transitions.keys()), p=list(transitions.values()))
|
next_state = rng.choice(list(transitions.keys()), p=list(transitions.values()))
|
||||||
dwell = max(0.5, rng.gamma(shape=2.0, scale=1.0)) # TODO: should use params from the profile data
|
shape, scale = self.dwell_params.get(state, (2.0, 1.0))
|
||||||
|
dwell = max(0.3, rng.gamma(shape=shape, scale=scale))
|
||||||
t += dwell
|
t += dwell
|
||||||
state = next_state
|
state = next_state
|
||||||
|
|
||||||
@@ -287,11 +309,13 @@ class CommercePlatform:
|
|||||||
|
|
||||||
human_prices = human_purchases["price_offered"] if not human_purchases.empty else pd.Series(dtype=float)
|
human_prices = human_purchases["price_offered"] if not human_purchases.empty else pd.Series(dtype=float)
|
||||||
human_costs = human_purchases["unit_cost"] if not human_purchases.empty else pd.Series(dtype=float)
|
human_costs = human_purchases["unit_cost"] if not human_purchases.empty else pd.Series(dtype=float)
|
||||||
|
human_base = human_purchases["base_price"] if not human_purchases.empty else pd.Series(dtype=float)
|
||||||
coi = 0.0
|
coi = 0.0
|
||||||
if not human_prices.empty and not human_costs.empty:
|
if not human_prices.empty and not human_costs.empty:
|
||||||
# of the purchased items, what is the margin between the price and cost
|
# COI = E[P] - p_min where p_min is cost, accounting for expected premium (base - realized)
|
||||||
# TODO: this should take into account the expected price we could have charged also
|
margin = human_prices.mean() - human_costs.mean()
|
||||||
coi = float(np.maximum(0.0, human_prices.mean() - human_costs.mean()))
|
expected_premium = human_base.mean() - human_prices.mean() if not human_base.empty else 0.0
|
||||||
|
coi = float(np.maximum(0.0, margin - expected_premium * 0.5))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"revenue_observed": revenue_observed,
|
"revenue_observed": revenue_observed,
|
||||||
@@ -302,6 +326,7 @@ class CommercePlatform:
|
|||||||
"mean_sale_price": mean_sale_price,
|
"mean_sale_price": mean_sale_price,
|
||||||
"look_to_book": look_to_book,
|
"look_to_book": look_to_book,
|
||||||
"coi": coi,
|
"coi": coi,
|
||||||
|
"expected_premium": float(expected_premium) if not human_base.empty else 0.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
|
def _session_feature_table(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
|||||||
Reference in New Issue
Block a user