From fa65fe992d93f064b2c1332272387c498e5f5518 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sun, 14 Dec 2025 17:30:01 +0100 Subject: [PATCH] initial environemnt definitions --- sim/rl/environment.py | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 sim/rl/environment.py diff --git a/sim/rl/environment.py b/sim/rl/environment.py new file mode 100644 index 0000000..803a4fd --- /dev/null +++ b/sim/rl/environment.py @@ -0,0 +1,80 @@ +import gymnasium as gym +from gymnasium import spaces +import numpy as np +from dataclasses import dataclass + +# here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing" + +@dataclass +class BusinessLogicConstraints(): + max_price_adjustment : float = 0.3 # maximum adjustment of price + system_max_price : float = 500.0 # maximum price allowed in the system + product_catelogue_size : int = 100 # number of products in the catalogue + + +class PHANTOMEnv(gym.Env): + def __init__(self): + super(PHANTOMEnv, self).__init__() + self.constraints = BusinessLogicConstraints() + self.action_space = spaces.Box( + low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment, + shape=(1,), dtype=np.float32) # we allow teh learner to adjust price by some BusinessLogicConstraints factor + # Example for using image as input: + self.observation_space = spaces.Dict({ + 'elasticity': spaces.Dict({ + 'price': spaces.Box(low=0, high=self.constraints.system_max_price, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32), + 'demand': spaces.Box(low=0, high=np.inf, + shape=(self.constraints.product_catelogue_size,), dtype=np.float32) + }) + }) + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + # Initialize state + self.state = { + 'price': 100.0, # base price + 'demand': 0.0 + } + return self.state, {} + + def step(self, action): + # Apply action + price_adjustment = action[0] + new_price = self.state['price'] * (1 + price_adjustment) + self.state['price'] = new_price + + # Simulate demand based on new price + demand = self.simulate_demand(new_price) + self.state['demand'] = demand + + # Calculate reward (e.g., revenue) + reward = new_price * demand + + # Check if episode is done + done = self.state['price'] <= 0.0 or self.state['demand'] <= 0.0 + + + return self.state, reward, done, False, {} + def simulate_demand(self, price): + # Simple linear demand model: demand decreases as price increases + base_demand = 200 + price_sensitivity = 0.5 + demand = max(0, base_demand - price_sensitivity * price) + return demand + +if __name__ == "__main__": + env = PHANTOMEnv() + obs, _ = env.reset() + done = False + total_reward = 0 + + while not done: + action = env.action_space.sample() # Random action + obs, reward, done, _, _ = env.step(action) + total_reward += reward + print(f"Price: {obs['price']:.2f}, Demand: {obs['demand']:.2f}, Reward: {reward:.2f}") + if done: + break + + print(f"Total Reward: {total_reward:.2f}")