Files
PHANTOM/lab/outlet/gym_wrapper.py

87 lines
3.1 KiB
Python

"""
Gymnasium-compatible wrapper for the Quote-Control platform.
Provides a standard Gym interface for RL training:
- observation_space: Box space with flattened observation
- action_space: Box space with price multipliers [0.5, 2.0]
- reset(), step(), render(), close() methods
Example:
>>> from lab.config import make_retail_platform
>>> from lab.outlet.gym_wrapper import QuoteGymEnv
>>> env = QuoteGymEnv(make_retail_platform())
>>> obs, info = env.reset()
>>> obs, reward, done, truncated, info = env.step(env.action_space.sample())
"""
from __future__ import annotations
from typing import Any
import numpy as np
try:
import gymnasium as gym
from gymnasium import spaces
HAS_GYM = True
except ImportError:
HAS_GYM = False
from .platform import Platform, PlatformConfig
from .types import Quote, InstrumentSet, StepResult
class QuoteGymEnv:
"""Gymnasium-compatible environment wrapper.
Wraps a Platform instance with standard Gym interface.
Actions are price multipliers in [0.5, 2.0] applied to reference prices.
Observations are flattened numpy arrays containing quotes, fills, exposures.
"""
def __init__(self, platform: Platform):
if not HAS_GYM:
raise ImportError("gymnasium required for QuoteGymEnv")
self.platform = platform
self.n = platform.instruments.n
self._last_result: StepResult | None = None
# action space: price adjustments as multipliers [0.5, 2.0]
self.action_space = spaces.Box(low=0.5, high=2.0, shape=(self.n,), dtype=np.float32)
# observation space
obs_dim = self.n * 4 # quotes + fills + exposures + position
if platform.market:
obs_dim += self.n # competitor quotes
self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
shape=(obs_dim,), dtype=np.float32)
def reset(self, seed: int | None = None, options: dict | None = None) -> tuple[np.ndarray, dict]:
result = self.platform.reset(seed)
self._last_result = result
return result.obs.to_flat().astype(np.float32), result.info
def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
# convert action (multipliers) to absolute prices
refs = self.platform.instruments.refs
prices = refs * action
result = self.platform.step(prices)
self._last_result = result
return (result.obs.to_flat().astype(np.float32), result.reward,
result.terminated, result.truncated, result.info)
def render(self) -> None:
if self._last_result:
m = self._last_result.metrics
print(f"t={self.platform._t} pnl={m.pnl:.2f} units={m.units_traded:.0f} "
f"conv={m.conversion:.3f} vol={m.volatility:.3f}")
def close(self) -> None:
pass
def make_env(platform: Platform) -> QuoteGymEnv:
return QuoteGymEnv(platform)
if HAS_GYM:
# register if gymnasium available
try:
gym.register(id='QuoteControl-v0', entry_point='outlet.gym_wrapper:QuoteGymEnv')
except:
pass # already registered or other issue