shock: defining new lab environment and formulation

This commit is contained in:
2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
from .base import BaseObjective, CompositeObjective
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
from .factory import make_objective, make_composite, retail_objective, market_making_objective
__all__ = [
'BaseObjective', 'CompositeObjective',
'PnLObjective', 'VolatilityPenalty', 'HoldingCostPenalty',
'LostOpportunityCostPenalty', 'InventoryRiskPenalty', 'SpreadCaptureReward',
'make_objective', 'make_composite', 'retail_objective', 'market_making_objective',
]

View File

@@ -0,0 +1,48 @@
"""
Base classes for reward objectives.
Objectives compute scalar rewards from step metrics. The CompositeObjective
allows combining multiple objectives with weights for multi-objective optimization.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
class BaseObjective(ABC):
"""Abstract base class for reward objectives.
Subclasses must implement reward() and breakdown() methods.
"""
@abstractmethod
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ...
@abstractmethod
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ...
class CompositeObjective(BaseObjective):
"""Weighted sum of multiple objectives.
Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility).
Args:
objectives: List of (objective, weight) tuples
"""
def __init__(self, objectives: list[tuple[BaseObjective, float]]):
self.objectives = objectives
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return sum(w * obj.reward(quote, instruments, metrics, hidden, obs)
for obj, w in self.objectives)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
bd = {}
for obj, w in self.objectives:
for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items():
bd[k] = w * v
return bd

View File

@@ -0,0 +1,82 @@
"""
Factory functions for creating objectives.
Provides:
- make_objective: Create single objective by name
- make_composite: Create weighted combination of objectives
- retail_objective: Default objective for retail pricing
- market_making_objective: Default objective for market making
"""
from __future__ import annotations
from .base import BaseObjective, CompositeObjective
from .penalties import (PnLObjective, VolatilityPenalty, HoldingCostPenalty,
LostOpportunityCostPenalty, InventoryRiskPenalty, SpreadCaptureReward)
REGISTRY: dict[str, type[BaseObjective]] = {
'pnl': PnLObjective,
'volatility': VolatilityPenalty,
'holding_cost': HoldingCostPenalty,
'lost_opportunity': LostOpportunityCostPenalty,
'inventory_risk': InventoryRiskPenalty,
'spread_capture': SpreadCaptureReward,
}
def make_objective(name: str, **kwargs) -> BaseObjective:
"""Create an objective by name.
Args:
name: Objective name (pnl, volatility, holding_cost, lost_opportunity,
inventory_risk, spread_capture)
**kwargs: Passed to objective constructor
Returns:
Instantiated objective
"""
if name not in REGISTRY:
raise ValueError(f"Unknown objective: {name}. Available: {list(REGISTRY.keys())}")
return REGISTRY[name](**kwargs)
def make_composite(spec: list[tuple[str, float, dict]] | dict[str, float]) -> CompositeObjective:
"""Create composite objective from specification.
Args:
spec: Either:
- list of (name, weight, kwargs) tuples for full control
- dict of {name: weight} for simple cases
Returns:
CompositeObjective with specified components
"""
objectives = []
if isinstance(spec, dict):
for name, weight in spec.items():
objectives.append((make_objective(name), weight))
else:
for name, weight, kwargs in spec:
objectives.append((make_objective(name, **kwargs), weight))
return CompositeObjective(objectives)
def retail_objective(volatility_weight: float = 0.1, holding_weight: float = 0.5,
stockout_weight: float = 0.3) -> CompositeObjective:
"""Default objective for retail dynamic pricing.
Reward = PnL - volatility_weight*volatility - holding_weight*holding_cost
- stockout_weight*lost_opportunity
"""
return make_composite({
'pnl': 1.0,
'volatility': volatility_weight,
'holding_cost': holding_weight,
'lost_opportunity': stockout_weight,
})
def market_making_objective(gamma: float = 0.1, sigma: float = 1.0) -> CompositeObjective:
"""Default objective for market making.
Reward = PnL + 0.5*spread_capture - inventory_risk(gamma, sigma)
"""
return CompositeObjective([
(PnLObjective(), 1.0),
(SpreadCaptureReward(), 0.5),
(InventoryRiskPenalty(gamma=gamma, sigma=sigma), 1.0),
])

View File

@@ -0,0 +1,101 @@
"""
Standard objective components and penalties.
This module provides common reward terms:
- PnLObjective: Basic profit and loss
- VolatilityPenalty: Penalize price volatility for UX
- HoldingCostPenalty: Inventory holding cost
- LostOpportunityCostPenalty: Stockout/missed fill cost
- InventoryRiskPenalty: Quadratic inventory risk (market making)
- SpreadCaptureReward: Bid-ask spread capture (market making)
"""
from __future__ import annotations
import numpy as np
from .base import BaseObjective
from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
from ..math_util import inventory_penalty
class PnLObjective(BaseObjective):
"""Profit and loss reward (revenue - cost)."""
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return metrics.pnl
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'pnl': metrics.pnl, 'revenue': metrics.revenue, 'cost': metrics.cost}
class VolatilityPenalty(BaseObjective):
"""Penalize price volatility for user experience."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.volatility
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'volatility_penalty': -self.scale * metrics.volatility}
class HoldingCostPenalty(BaseObjective):
"""Penalty for inventory holding costs."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.position_cost
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'holding_cost_penalty': -self.scale * metrics.position_cost}
class LostOpportunityCostPenalty(BaseObjective):
"""Penalty for lost sales due to stockouts or missed fills."""
def __init__(self, scale: float = 1.0):
self.scale = scale
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return -self.scale * metrics.lost_opportunity
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'lost_opportunity_penalty': -self.scale * metrics.lost_opportunity}
class InventoryRiskPenalty(BaseObjective):
"""Quadratic inventory risk penalty (Avellaneda-Stoikov style).
Penalty = gamma * sigma^2 * q^2 / 2, where q is total position.
Encourages market makers to keep inventory near zero.
"""
def __init__(self, gamma: float = 0.1, sigma: float = 1.0):
self.gamma = gamma
self.sigma = sigma
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
if obs.position is None: return 0.0
q = np.sum(obs.position)
return -inventory_penalty(q, self.gamma, self.sigma)
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'inventory_risk_penalty': self.reward(quote, instruments, metrics, hidden, obs)}
class SpreadCaptureReward(BaseObjective):
"""Reward for capturing bid-ask spread in market making."""
def reward(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
return metrics.spread_capture
def breakdown(self, quote: Quote, instruments: InstrumentSet,
metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
return {'spread_capture': metrics.spread_capture}