mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
fix: supra reward adjustment and sweep
This commit is contained in:
@@ -54,6 +54,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
coi_levels: list[float] = []
|
coi_levels: list[float] = []
|
||||||
coi_leakages: list[float] = []
|
coi_leakages: list[float] = []
|
||||||
volatilities: list[float] = []
|
volatilities: list[float] = []
|
||||||
|
upward_volatilities: list[float] = []
|
||||||
|
supra_shares: list[float] = []
|
||||||
|
supra_penalties: list[float] = []
|
||||||
agent_probs: list[float] = []
|
agent_probs: list[float] = []
|
||||||
|
|
||||||
for _ in range(int(episodes)):
|
for _ in range(int(episodes)):
|
||||||
@@ -65,6 +68,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
ep_coi = 0.0
|
ep_coi = 0.0
|
||||||
ep_coi_leakage = 0.0
|
ep_coi_leakage = 0.0
|
||||||
ep_volatility = 0.0
|
ep_volatility = 0.0
|
||||||
|
ep_upward_volatility = 0.0
|
||||||
|
ep_supra_share = 0.0
|
||||||
|
ep_supra_penalty = 0.0
|
||||||
ep_agent_prob = 0.0
|
ep_agent_prob = 0.0
|
||||||
steps = 0
|
steps = 0
|
||||||
|
|
||||||
@@ -78,6 +84,15 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
ep_coi += float(econ.get("coi_level", 0.0))
|
ep_coi += float(econ.get("coi_level", 0.0))
|
||||||
ep_coi_leakage += float(econ.get("coi_leakage", 0.0))
|
ep_coi_leakage += float(econ.get("coi_leakage", 0.0))
|
||||||
ep_volatility += float(econ.get("volatility", 0.0))
|
ep_volatility += float(econ.get("volatility", 0.0))
|
||||||
|
ep_upward_volatility += float(
|
||||||
|
info.get("upward_volatility", econ.get("upward_volatility", 0.0))
|
||||||
|
)
|
||||||
|
ep_supra_share += float(
|
||||||
|
info.get("supra_share", econ.get("supra_share", 0.0))
|
||||||
|
)
|
||||||
|
ep_supra_penalty += float(
|
||||||
|
info.get("supra_penalty", econ.get("supra_penalty", 0.0))
|
||||||
|
)
|
||||||
ep_agent_prob += float(econ.get("agent_prob", info.get("agent_prob", 0.0)))
|
ep_agent_prob += float(econ.get("agent_prob", info.get("agent_prob", 0.0)))
|
||||||
steps += 1
|
steps += 1
|
||||||
|
|
||||||
@@ -88,6 +103,9 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
coi_levels.append(ep_coi / denom)
|
coi_levels.append(ep_coi / denom)
|
||||||
coi_leakages.append(ep_coi_leakage / denom)
|
coi_leakages.append(ep_coi_leakage / denom)
|
||||||
volatilities.append(ep_volatility / denom)
|
volatilities.append(ep_volatility / denom)
|
||||||
|
upward_volatilities.append(ep_upward_volatility / denom)
|
||||||
|
supra_shares.append(ep_supra_share / denom)
|
||||||
|
supra_penalties.append(ep_supra_penalty / denom)
|
||||||
agent_probs.append(ep_agent_prob / denom)
|
agent_probs.append(ep_agent_prob / denom)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -99,6 +117,13 @@ def _evaluate_env(agent: Any, env: Any, episodes: int) -> dict[str, float]:
|
|||||||
"eval/coi_level_mean": float(np.mean(coi_levels)) if coi_levels else 0.0,
|
"eval/coi_level_mean": float(np.mean(coi_levels)) if coi_levels else 0.0,
|
||||||
"eval/coi_leakage_mean": float(np.mean(coi_leakages)) if coi_leakages else 0.0,
|
"eval/coi_leakage_mean": float(np.mean(coi_leakages)) if coi_leakages else 0.0,
|
||||||
"eval/volatility_mean": float(np.mean(volatilities)) if volatilities else 0.0,
|
"eval/volatility_mean": float(np.mean(volatilities)) if volatilities else 0.0,
|
||||||
|
"eval/upward_volatility_mean": (
|
||||||
|
float(np.mean(upward_volatilities)) if upward_volatilities else 0.0
|
||||||
|
),
|
||||||
|
"eval/supra_share_mean": float(np.mean(supra_shares)) if supra_shares else 0.0,
|
||||||
|
"eval/supra_penalty_mean": (
|
||||||
|
float(np.mean(supra_penalties)) if supra_penalties else 0.0
|
||||||
|
),
|
||||||
"eval/agent_prob_mean": float(np.mean(agent_probs)) if agent_probs else 0.0,
|
"eval/agent_prob_mean": float(np.mean(agent_probs)) if agent_probs else 0.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -71,6 +71,10 @@ class EconomicMetricsWrapper(gym.Wrapper):
|
|||||||
"coi_penalty",
|
"coi_penalty",
|
||||||
"ux_penalty",
|
"ux_penalty",
|
||||||
"volatility",
|
"volatility",
|
||||||
|
"upward_volatility",
|
||||||
|
"supra_penalty",
|
||||||
|
"supra_share",
|
||||||
|
"competitive_anchor",
|
||||||
"profit",
|
"profit",
|
||||||
"cost_floor",
|
"cost_floor",
|
||||||
"reward_revenue",
|
"reward_revenue",
|
||||||
|
|||||||
53
engine/sweeps/ppo_supra_guard.yaml
Normal file
53
engine/sweeps/ppo_supra_guard.yaml
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
method: random
|
||||||
|
metric:
|
||||||
|
name: eval/supra_share_mean
|
||||||
|
goal: minimize
|
||||||
|
run_cap: 256
|
||||||
|
command:
|
||||||
|
- ${env}
|
||||||
|
- python
|
||||||
|
- -m
|
||||||
|
- engine.train
|
||||||
|
parameters:
|
||||||
|
algo:
|
||||||
|
value: ppo
|
||||||
|
seed:
|
||||||
|
values: [42, 1337, 7777]
|
||||||
|
alpha:
|
||||||
|
values: [0.1, 0.2, 0.3, 0.4, 0.6]
|
||||||
|
n_products:
|
||||||
|
values: [25, 50]
|
||||||
|
N:
|
||||||
|
value: 100
|
||||||
|
no_robust:
|
||||||
|
values: [false, true]
|
||||||
|
lambda_coi:
|
||||||
|
values: [0.05, 0.15, 0.3]
|
||||||
|
robust_radius:
|
||||||
|
values: [0.1, 0.2, 0.3]
|
||||||
|
robust_points:
|
||||||
|
value: 7
|
||||||
|
robust_rollouts:
|
||||||
|
value: 1
|
||||||
|
eta_ux:
|
||||||
|
values: [0.05, 0.15, 0.3, 0.5, 0.75]
|
||||||
|
reward_profit_weight:
|
||||||
|
value: 1.0
|
||||||
|
total_timesteps:
|
||||||
|
value: 100000
|
||||||
|
eval_episodes:
|
||||||
|
value: 10
|
||||||
|
eval_freq:
|
||||||
|
value: 1000
|
||||||
|
log_freq:
|
||||||
|
value: 100
|
||||||
|
hist_freq:
|
||||||
|
value: 500
|
||||||
|
learning_rate:
|
||||||
|
value: 0.0003
|
||||||
|
batch_size:
|
||||||
|
value: 256
|
||||||
|
n_steps:
|
||||||
|
value: 2048
|
||||||
|
device:
|
||||||
|
value: cpu
|
||||||
@@ -216,18 +216,27 @@ class PHANTOM(gym.Env):
|
|||||||
coi_penalty = self.lambda_coi * coi_leakage * info_budget
|
coi_penalty = self.lambda_coi * coi_leakage * info_budget
|
||||||
|
|
||||||
if len(self._price_history) > 0:
|
if len(self._price_history) > 0:
|
||||||
volatility = float(
|
prev_prices = np.asarray(self._price_history[-1], dtype=float)
|
||||||
np.mean(
|
rel_change = (prices - prev_prices) / np.maximum(prev_prices, 1.0)
|
||||||
np.abs(prices - self._price_history[-1])
|
volatility = float(np.mean(np.abs(rel_change)))
|
||||||
/ np.maximum(self.baseline_prices, 1.0)
|
upward_volatility = float(np.mean(np.clip(rel_change, 0.0, None)))
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
volatility = 0.0
|
volatility = 0.0
|
||||||
ux_penalty = self.eta_ux * info_budget * volatility
|
upward_volatility = 0.0
|
||||||
|
ux_penalty = self.eta_ux * info_budget * (volatility + 0.5 * upward_volatility)
|
||||||
|
|
||||||
|
competitive_anchor = float(
|
||||||
|
np.clip(float(self.human_params[0]) * 1.2, *self.price_bounds)
|
||||||
|
)
|
||||||
|
price_ratio = prices / max(competitive_anchor, 1.0)
|
||||||
|
supra_excess = np.clip(price_ratio - 1.0, 0.0, None)
|
||||||
|
supra_penalty = (
|
||||||
|
0.5 * self.eta_ux * info_budget * float(np.mean(np.square(supra_excess)))
|
||||||
|
)
|
||||||
|
supra_share = float(np.mean(supra_excess > 0.0))
|
||||||
|
|
||||||
reward_revenue = self.reward_profit_weight * profit
|
reward_revenue = self.reward_profit_weight * profit
|
||||||
reward = reward_revenue - coi_penalty - ux_penalty
|
reward = reward_revenue - coi_penalty - ux_penalty - supra_penalty
|
||||||
|
|
||||||
return reward, {
|
return reward, {
|
||||||
"revenue": revenue,
|
"revenue": revenue,
|
||||||
@@ -240,6 +249,10 @@ class PHANTOM(gym.Env):
|
|||||||
"coi_info_budget": info_budget,
|
"coi_info_budget": info_budget,
|
||||||
"ux_penalty": ux_penalty,
|
"ux_penalty": ux_penalty,
|
||||||
"volatility": volatility,
|
"volatility": volatility,
|
||||||
|
"upward_volatility": upward_volatility,
|
||||||
|
"supra_penalty": supra_penalty,
|
||||||
|
"supra_share": supra_share,
|
||||||
|
"competitive_anchor": competitive_anchor,
|
||||||
"reward_revenue": reward_revenue,
|
"reward_revenue": reward_revenue,
|
||||||
"reward_total": reward,
|
"reward_total": reward,
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user