mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
shock: defining new lab environment and formulation
This commit is contained in:
101
lab/outlet/objectives/penalties.py
Normal file
101
lab/outlet/objectives/penalties.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Standard objective components and penalties.
|
||||
|
||||
This module provides common reward terms:
|
||||
- PnLObjective: Basic profit and loss
|
||||
- VolatilityPenalty: Penalize price volatility for UX
|
||||
- HoldingCostPenalty: Inventory holding cost
|
||||
- LostOpportunityCostPenalty: Stockout/missed fill cost
|
||||
- InventoryRiskPenalty: Quadratic inventory risk (market making)
|
||||
- SpreadCaptureReward: Bid-ask spread capture (market making)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import numpy as np
|
||||
from .base import BaseObjective
|
||||
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
|
||||
from ..math_util import inventory_penalty
|
||||
|
||||
class PnLObjective(BaseObjective):
|
||||
"""Profit and loss reward (revenue - cost)."""
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return metrics.pnl
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost}
|
||||
|
||||
class VolatilityPenalty(BaseObjective):
|
||||
"""Penalize price volatility for user experience."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.volatility
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'volatility_penalty': -self.scale * metrics.volatility}
|
||||
|
||||
class HoldingCostPenalty(BaseObjective):
|
||||
"""Penalty for inventory holding costs."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.position_cost
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'holding_cost_penalty': -self.scale * metrics.position_cost}
|
||||
|
||||
class LostOpportunityCostPenalty(BaseObjective):
|
||||
"""Penalty for lost sales due to stockouts or missed fills."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.lost_opportunity
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity}
|
||||
|
||||
class InventoryRiskPenalty(BaseObjective):
|
||||
"""Quadratic inventory risk penalty (Avellaneda-Stoikov style).
|
||||
|
||||
Penalty = gamma * sigma^2 * q^2 / 2, where q is total position.
|
||||
Encourages market makers to keep inventory near zero.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma: float = 0.1, sigma: float = 1.0):
|
||||
self.gamma = gamma
|
||||
self.sigma = sigma
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
if obs.position is None: return 0.0
|
||||
q = np.sum(obs.position)
|
||||
return -inventory_penalty(q, self.gamma, self.sigma)
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)}
|
||||
|
||||
class SpreadCaptureReward(BaseObjective):
|
||||
"""Reward for capturing bid-ask spread in market making."""
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return metrics.spread_capture
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'spread_capture': metrics.spread_capture}
|
||||
Reference in New Issue
Block a user