mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
chor> competitive wrapping
This commit is contained in:
@@ -60,7 +60,25 @@ class MarketEngine:
|
|||||||
]
|
]
|
||||||
# store trajectories for agent probability calculation
|
# store trajectories for agent probability calculation
|
||||||
self.last_trajectories = human_t + agent_t
|
self.last_trajectories = human_t + agent_t
|
||||||
return estimate_demand(self.last_trajectories, self.action_weights)
|
|
||||||
|
demand_proxy = estimate_demand(
|
||||||
|
self.last_trajectories,
|
||||||
|
self.action_weights,
|
||||||
|
normalize=True,
|
||||||
|
per_session=False,
|
||||||
|
)
|
||||||
|
raw_mix = ((1.0 - float(self.alpha)) * demand_h) + (
|
||||||
|
float(self.alpha) * demand_a
|
||||||
|
)
|
||||||
|
total_raw_demand = float(np.sum(raw_mix))
|
||||||
|
if not demand_proxy:
|
||||||
|
return {i: float(raw_mix[i]) for i in range(len(prices))}
|
||||||
|
if total_raw_demand <= 0.0:
|
||||||
|
return {i: 0.0 for i in range(len(prices))}
|
||||||
|
return {
|
||||||
|
i: total_raw_demand * float(demand_proxy.get(i, 0.0)) / 100.0
|
||||||
|
for i in range(len(prices))
|
||||||
|
}
|
||||||
|
|
||||||
def measure(self):
|
def measure(self):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -130,6 +130,13 @@ class PHANTOM(gym.Env):
|
|||||||
self._initial_episode_prices = None
|
self._initial_episode_prices = None
|
||||||
self._trajectories = [] # session trajectories for agent prob calculation
|
self._trajectories = [] # session trajectories for agent prob calculation
|
||||||
self.baseline_prices = np.full(self.n_products, self.price_bounds[0])
|
self.baseline_prices = np.full(self.n_products, self.price_bounds[0])
|
||||||
|
self.anchor_prices = np.full(
|
||||||
|
self.n_products,
|
||||||
|
float(np.clip(float(self.human_params[0]), *self.price_bounds)),
|
||||||
|
)
|
||||||
|
self.competitive_cap = float(
|
||||||
|
min(self.price_bounds[1], float(np.mean(self.anchor_prices)) * 1.15)
|
||||||
|
)
|
||||||
self._low_margin_streak = 0 # consecutive steps below margin_floor
|
self._low_margin_streak = 0 # consecutive steps below margin_floor
|
||||||
self._last_agent_prob = float(self.alpha)
|
self._last_agent_prob = float(self.alpha)
|
||||||
self._last_alpha_adv = float(self.alpha)
|
self._last_alpha_adv = float(self.alpha)
|
||||||
@@ -169,19 +176,28 @@ class PHANTOM(gym.Env):
|
|||||||
self.market.Nhumans = self.N - n_agents
|
self.market.Nhumans = self.N - n_agents
|
||||||
|
|
||||||
def _decode_action(self, action) -> np.ndarray:
|
def _decode_action(self, action) -> np.ndarray:
|
||||||
base = (
|
prev = self._prices
|
||||||
self._prices
|
base = self.anchor_prices
|
||||||
if self._prices is not None
|
|
||||||
else np.full(self.n_products, self.price_bounds[0], dtype=float)
|
def _blend(target: np.ndarray) -> np.ndarray:
|
||||||
)
|
if prev is None:
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(target, lower, self.competitive_cap)
|
||||||
|
blended = 0.75 * np.asarray(prev, dtype=float) + 0.25 * target
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(blended, lower, self.competitive_cap)
|
||||||
|
|
||||||
if np.isscalar(action):
|
if np.isscalar(action):
|
||||||
idx = int(np.clip(int(action), 0, self.action_levels - 1))
|
idx = int(np.clip(int(action), 0, self.action_levels - 1))
|
||||||
return np.clip(base * self._action_scales[idx], *self.price_bounds)
|
target = base * self._action_scales[idx]
|
||||||
|
return _blend(target)
|
||||||
a = np.asarray(action)
|
a = np.asarray(action)
|
||||||
if a.size == 1:
|
if a.size == 1:
|
||||||
idx = int(np.clip(int(a.reshape(-1)[0]), 0, self.action_levels - 1))
|
idx = int(np.clip(int(a.reshape(-1)[0]), 0, self.action_levels - 1))
|
||||||
return np.clip(base * self._action_scales[idx], *self.price_bounds)
|
target = base * self._action_scales[idx]
|
||||||
return np.clip(a.astype(float), *self.price_bounds)
|
return _blend(target)
|
||||||
|
lower = float(self.price_bounds[0])
|
||||||
|
return np.clip(a.astype(float), lower, self.competitive_cap)
|
||||||
|
|
||||||
def _compute_agent_prob(self, trajectories=None) -> float:
|
def _compute_agent_prob(self, trajectories=None) -> float:
|
||||||
trajectories = (
|
trajectories = (
|
||||||
@@ -225,14 +241,10 @@ class PHANTOM(gym.Env):
|
|||||||
upward_volatility = 0.0
|
upward_volatility = 0.0
|
||||||
ux_penalty = self.eta_ux * info_budget * (volatility + 0.5 * upward_volatility)
|
ux_penalty = self.eta_ux * info_budget * (volatility + 0.5 * upward_volatility)
|
||||||
|
|
||||||
competitive_anchor = float(
|
competitive_anchor = float(np.mean(self.anchor_prices))
|
||||||
np.clip(float(self.human_params[0]) * 1.2, *self.price_bounds)
|
|
||||||
)
|
|
||||||
price_ratio = prices / max(competitive_anchor, 1.0)
|
price_ratio = prices / max(competitive_anchor, 1.0)
|
||||||
supra_excess = np.clip(price_ratio - 1.0, 0.0, None)
|
supra_excess = np.clip(price_ratio - 1.15, 0.0, None)
|
||||||
supra_penalty = (
|
supra_penalty = 4.0 * info_budget * float(np.mean(np.square(supra_excess)))
|
||||||
0.5 * self.eta_ux * info_budget * float(np.mean(np.square(supra_excess)))
|
|
||||||
)
|
|
||||||
supra_share = float(np.mean(supra_excess > 0.0))
|
supra_share = float(np.mean(supra_excess > 0.0))
|
||||||
|
|
||||||
reward_revenue = self.reward_profit_weight * profit
|
reward_revenue = self.reward_profit_weight * profit
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ pandas
|
|||||||
jupyter
|
jupyter
|
||||||
ipykernel
|
ipykernel
|
||||||
matplotlib
|
matplotlib
|
||||||
|
tikzplotlib
|
||||||
graphviz
|
graphviz
|
||||||
browser-use
|
browser-use
|
||||||
pytest
|
pytest
|
||||||
|
|||||||
Reference in New Issue
Block a user