shock: defining new lab environment and formulation

2026-05-31 16:43:36 +00:00 · 2026-01-23 10:37:32 +01:00
parent a033e77697
commit 4e2e41d943
41 changed files with 4175 additions and 0 deletions
--- a/lab/outlet/objectives/base.py
+++ b/lab/outlet/objectives/base.py
@@ -0,0 +1,48 @@
+"""
+Base classes for reward objectives.
+
+Objectives compute scalar rewards from step metrics. The CompositeObjective
+allows combining multiple objectives with weights for multi-objective optimization.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from ..types import Quote, InstrumentSet, StepMetrics, HiddenState, Observation
+
+class BaseObjective(ABC):
+    """Abstract base class for reward objectives.
+
+    Subclasses must implement reward() and breakdown() methods.
+    """
+
+    @abstractmethod
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float: ...
+
+    @abstractmethod
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]: ...
+
+class CompositeObjective(BaseObjective):
+    """Weighted sum of multiple objectives.
+
+    Allows combining multiple reward terms (e.g., PnL - holding_cost - volatility).
+
+    Args:
+        objectives: List of (objective, weight) tuples
+    """
+
+    def __init__(self, objectives: list[tuple[BaseObjective, float]]):
+        self.objectives = objectives
+
+    def reward(self, quote: Quote, instruments: InstrumentSet,
+               metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> float:
+        return sum(w * obj.reward(quote, instruments, metrics, hidden, obs)
+                   for obj, w in self.objectives)
+
+    def breakdown(self, quote: Quote, instruments: InstrumentSet,
+                  metrics: StepMetrics, hidden: HiddenState, obs: Observation) -> dict[str, float]:
+        bd = {}
+        for obj, w in self.objectives:
+            for k, v in obj.breakdown(quote, instruments, metrics, hidden, obs).items():
+                bd[k] = w * v
+        return bd