mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
shock: defining new lab environment and formulation
This commit is contained in:
11
lab/outlet/objectives/__init__.py
Normal file
11
lab/outlet/objectives/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from .base import BaseObjective, CompositeObjective
|
||||
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
|
||||
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
|
||||
from .factory import make_objective, make_composite, retail_objective, market_making_objective
|
||||
|
||||
__all__ = [
|
||||
'BaseObjective', 'CompositeObjective',
|
||||
'PnLObjective', 'VolatilityPenalty', 'HoldingCostPenalty',
|
||||
'LostOpportunityCostPenalty', 'InventoryRiskPenalty', 'SpreadCaptureReward',
|
||||
'make_objective', 'make_composite', 'retail_objective', 'market_making_objective',
|
||||
]
|
||||
48
lab/outlet/objectives/base.py
Normal file
48
lab/outlet/objectives/base.py
Normal file
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Base classes for reward objectives.
|
||||
|
||||
Objectives compute scalar rewards from step metrics. The CompositeObjective
|
||||
allows combining multiple objectives with weights for multi-objective optimization.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
|
||||
|
||||
class BaseObjective(ABC):
|
||||
"""Abstract base class for reward objectives.
|
||||
|
||||
Subclasses must implement reward() and breakdown() methods.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ...
|
||||
|
||||
@abstractmethod
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ...
|
||||
|
||||
class CompositeObjective(BaseObjective):
|
||||
"""Weighted sum of multiple objectives.
|
||||
|
||||
Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility).
|
||||
|
||||
Args:
|
||||
objectives: List of (objective, weight) tuples
|
||||
"""
|
||||
|
||||
def __init__(self, objectives: list[tuple[BaseObjective, float]]):
|
||||
self.objectives = objectives
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return sum(w * obj.reward(quote, instruments, metrics, hidden, obs)
|
||||
for obj, w in self.objectives)
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
bd = {}
|
||||
for obj, w in self.objectives:
|
||||
for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items():
|
||||
bd[k] = w * v
|
||||
return bd
|
||||
82
lab/outlet/objectives/factory.py
Normal file
82
lab/outlet/objectives/factory.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Factory functions for creating objectives.
|
||||
|
||||
Provides:
|
||||
- make_objective: Create single objective by name
|
||||
- make_composite: Create weighted combination of objectives
|
||||
- retail_objective: Default objective for retail pricing
|
||||
- market_making_objective: Default objective for market making
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from .base import BaseObjective, CompositeObjective
|
||||
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
|
||||
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
|
||||
|
||||
REGISTRY: dict[str, type[BaseObjective]] = {
|
||||
'pnl': PnLObjective,
|
||||
'volatility': VolatilityPenalty,
|
||||
'holding_cost': HoldingCostPenalty,
|
||||
'lost_opportunity': LostOpportunityCostPenalty,
|
||||
'inventory_risk': InventoryRiskPenalty,
|
||||
'spread_capture': SpreadCaptureReward,
|
||||
}
|
||||
|
||||
def make_objective(name: str, **kwargs) -> BaseObjective:
|
||||
"""Create an objective by name.
|
||||
|
||||
Args:
|
||||
name: Objective name (pnl, volatility, holding_cost, lost_opportunity,
|
||||
inventory_risk, spread_capture)
|
||||
**kwargs: Passed to objective constructor
|
||||
|
||||
Returns:
|
||||
Instantiated objective
|
||||
"""
|
||||
if name not in REGISTRY:
|
||||
raise ValueError(f"Unknown objective: {name}. Available: {list(REGISTRY.keys())}")
|
||||
return REGISTRY[name](**kwargs)
|
||||
|
||||
def make_composite(spec: list[tuple[str, float, dict]] | dict[str, float]) -> CompositeObjective:
|
||||
"""Create composite objective from specification.
|
||||
|
||||
Args:
|
||||
spec: Either:
|
||||
- list of (name, weight, kwargs) tuples for full control
|
||||
- dict of {name: weight} for simple cases
|
||||
|
||||
Returns:
|
||||
CompositeObjective with specified components
|
||||
"""
|
||||
objectives = []
|
||||
if isinstance(spec, dict):
|
||||
for name, weight in spec.items():
|
||||
objectives.append((make_objective(name), weight))
|
||||
else:
|
||||
for name, weight, kwargs in spec:
|
||||
objectives.append((make_objective(name, **kwargs), weight))
|
||||
return CompositeObjective(objectives)
|
||||
|
||||
def retail_objective(volatility_weight: float = 0.1, holding_weight: float = 0.5,
|
||||
stockout_weight: float = 0.3) -> CompositeObjective:
|
||||
"""Default objective for retail dynamic pricing.
|
||||
|
||||
Reward = PnL - volatility_weight*volatility - holding_weight*holding_cost
|
||||
- stockout_weight*lost_opportunity
|
||||
"""
|
||||
return make_composite({
|
||||
'pnl': 1.0,
|
||||
'volatility': volatility_weight,
|
||||
'holding_cost': holding_weight,
|
||||
'lost_opportunity': stockout_weight,
|
||||
})
|
||||
|
||||
def market_making_objective(gamma: float = 0.1, sigma: float = 1.0) -> CompositeObjective:
|
||||
"""Default objective for market making.
|
||||
|
||||
Reward = PnL + 0.5*spread_capture - inventory_risk(gamma, sigma)
|
||||
"""
|
||||
return CompositeObjective([
|
||||
(PnLObjective(), 1.0),
|
||||
(SpreadCaptureReward(), 0.5),
|
||||
(InventoryRiskPenalty(gamma=gamma, sigma=sigma), 1.0),
|
||||
])
|
||||
101
lab/outlet/objectives/penalties.py
Normal file
101
lab/outlet/objectives/penalties.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Standard objective components and penalties.
|
||||
|
||||
This module provides common reward terms:
|
||||
- PnLObjective: Basic profit and loss
|
||||
- VolatilityPenalty: Penalize price volatility for UX
|
||||
- HoldingCostPenalty: Inventory holding cost
|
||||
- LostOpportunityCostPenalty: Stockout/missed fill cost
|
||||
- InventoryRiskPenalty: Quadratic inventory risk (market making)
|
||||
- SpreadCaptureReward: Bid-ask spread capture (market making)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import numpy as np
|
||||
from .base import BaseObjective
|
||||
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
|
||||
from ..math_util import inventory_penalty
|
||||
|
||||
class PnLObjective(BaseObjective):
|
||||
"""Profit and loss reward (revenue - cost)."""
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return metrics.pnl
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost}
|
||||
|
||||
class VolatilityPenalty(BaseObjective):
|
||||
"""Penalize price volatility for user experience."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.volatility
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'volatility_penalty': -self.scale * metrics.volatility}
|
||||
|
||||
class HoldingCostPenalty(BaseObjective):
|
||||
"""Penalty for inventory holding costs."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.position_cost
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'holding_cost_penalty': -self.scale * metrics.position_cost}
|
||||
|
||||
class LostOpportunityCostPenalty(BaseObjective):
|
||||
"""Penalty for lost sales due to stockouts or missed fills."""
|
||||
|
||||
def __init__(self, scale: float = 1.0):
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return -self.scale * metrics.lost_opportunity
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity}
|
||||
|
||||
class InventoryRiskPenalty(BaseObjective):
|
||||
"""Quadratic inventory risk penalty (Avellaneda-Stoikov style).
|
||||
|
||||
Penalty = gamma * sigma^2 * q^2 / 2, where q is total position.
|
||||
Encourages market makers to keep inventory near zero.
|
||||
"""
|
||||
|
||||
def __init__(self, gamma: float = 0.1, sigma: float = 1.0):
|
||||
self.gamma = gamma
|
||||
self.sigma = sigma
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
if obs.position is None: return 0.0
|
||||
q = np.sum(obs.position)
|
||||
return -inventory_penalty(q, self.gamma, self.sigma)
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)}
|
||||
|
||||
class SpreadCaptureReward(BaseObjective):
|
||||
"""Reward for capturing bid-ask spread in market making."""
|
||||
|
||||
def reward(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
|
||||
return metrics.spread_capture
|
||||
|
||||
def breakdown(self, quote: Quote, instruments: InstrumentSet,
|
||||
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
|
||||
return {'spread_capture': metrics.spread_capture}
|
||||
Reference in New Issue
Block a user