high level defintion

2026-07-15 17:43:36 +00:00 · 2025-12-14 17:53:48 +01:00
parent 20132c084c
commit 7d09232e48
1 changed files with 83 additions and 11 deletions
--- a/sim/rl/environment.py
+++ b/sim/rl/environment.py
@@ -2,6 +2,7 @@ import gymnasium as gym
 from gymnasium import spaces
 import numpy as np
 from dataclasses import dataclass
 import pandas as pd
 # here when we say "learner" we mean the agent that is learning to optimize the pricing and "agent" is part of the envrionment where the agent is creating demand that that "learner" is processing"
@@ -9,17 +10,89 @@ from dataclasses import dataclass
 class BusinessLogicConstraints():
    max_price_adjustment : float = 0.3 # maximum adjustment of price
    system_max_price : float = 500.0 # maximum price allowed in the system
    system_min_price : float = 1.0 # minimum price allowed in the system
    product_catelogue_size : int = 100 # number of products in the catalogue
 class CommercePlatform:
    def __init__(self, product_catelogue_size: int, max_price: float, min_price: float):
        self.product_catelogue_size = product_catelogue_size
        self.max_price = max_price
        self.min_price = min_price
        self.simulation_history = []
    def setup_true_demand(self,prices: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        human_price_elasticity = -1.5  # Example elasticity value
        base_demand = 100  # Base demand for products
        demand = base_demand * (prices / self.max_price) ** human_price_elasticity
        agent_price_elasticity = -2.0  # Example elasticity value for agents
        agent_base_demand = 150  # Base demand for agents
        agent_demand = agent_base_demand * (prices / self.max_price) ** agent_price_elasticity
        return demand + agent_demand, agent_demand
    def compute_interaction_features(self, interaction_data: np.ndarray) -> dict:
        df = pd.DataFrame(interaction_data)
        return {
            'mean_sale_price': df[df['action'] == 'purchase']['price'].mean(),
        }
    def run_pricing_simulation(self, prices: np.ndarray) -> np.ndarray:
        # Simulate demand based on prices
        observed_demand, demand_from_agents = self.setup_true_demand(prices)
        true_demand = observed_demand - demand_from_agents
        interaction_data = self.get_interaction_data()
        interaction_features = self.compute_interaction_features(interaction_data)
        demand_estimates = self.demand_estimate(interaction_data)
        internal_error = np.abs(true_demand - demand_estimates) / (true_demand + 1e-6)
        self.simulation_history.append(
            {
                'prices': prices,
                'true_demand': true_demand,
                'demand_estimates': demand_estimates,
                'internal_error': internal_error,
                'interaction_data': interaction_data,
                'interaction_features': interaction_features
            })
        return np.array(interaction_data)
    def get_interaction_data(self) -> np.ndarray:
        # Simulate interaction data
        interaction_data = []
        return np.array(interaction_data)
    def demand_estimate(self, interactions : np.ndarray) -> np.ndarray:
        demand_estimates = np.random.rand(self.product_catelogue_size) * 100  # Dummy demand estimates
        return demand_estimates
 class PHANTOMEnv(gym.Env):
    def __init__(self):
        super(PHANTOMEnv, self).__init__()
        self.constraints = BusinessLogicConstraints()
        self.action_space = spaces.Box(
            low=-self.constraints.max_price_adjustment, high=self.constraints.max_price_adjustment,
-            shape=(1,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
+            shape=(self.constraints.product_catelogue_size,), dtype=np.float32) #  we allow teh learner to adjust price by some BusinessLogicConstraints factor
        # Example for using image as input:
        self.commerce_platform = CommercePlatform(
            product_catelogue_size=self.constraints.product_catelogue_size,
            max_price=self.constraints.system_max_price,
            min_price=self.constraints.system_min_price
        )
        self.observation_space = spaces.Dict({
            'elasticity': spaces.Dict({
                'price': spaces.Box(low=0, high=self.constraints.system_max_price,
@@ -29,24 +102,23 @@ class PHANTOMEnv(gym.Env):
            })
        })
-    def reset(self, seed=None, options=None):
+    def reset(self, seed :int, options) -> tuple[dict, dict]:
        super().reset(seed=seed)
        # Initialize state
        self.state = {
-            'price': 100.0,  # base price
+            'elasticity': {
-            'demand': 0.0
+                'price': np.full((self.constraints.product_catelogue_size,), 100.0, dtype=np.float32),
                'demand': np.full((self.constraints.product_catelogue_size,), 50.0, dtype=np.float32)
            }
        }
        return self.state, {}
    def step(self, action):
-        # Apply action
+        self.state['price'] = np.clip(self.state['price'] * (1 + action),
-        price_adjustment = action[0]
+                            self.constraints.system_min_price,
-        new_price = self.state['price'] * (1 + price_adjustment)
+                            self.constraints.system_max_price)
-        self.state['price'] = new_price
+
        # Simulate demand based on new price
        demand = self.simulate_demand(new_price)
        self.state['demand'] = demand
        # Calculate reward (e.g., revenue)
        reward = new_price * demand