Merge branch 'agent-behavior-loader-developemen' into feat-strong-learning-implementation-with-data-contamination

2026-07-16 01:53:37 +00:00 · 2026-01-31 10:08:59 +01:00
parent 26abff5864 72877439ca
commit 2f481bd94b
25 changed files with 1205 additions and 117 deletions
--- a/sim/rl/engine.py
+++ b/sim/rl/engine.py
@@ -1,10 +1,10 @@
+from os import kill
 import numpy as np
 import pandas as pd
 from abc import ABC, abstractmethod
 from typing import Dict, Any
 from sim.rl.environment import BusinessLogicConstraints

-
 """
 An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
 From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
@@ -39,6 +39,7 @@ class BasePricingEngine(ABC):



+
    def reset(self):
        """reset engine state for new episode"""
        self.step_count = 0
@@ -68,15 +69,16 @@ class WildPricingEngine(BasePricingEngine):

    def reset(self):
        super().reset()
-        self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32)
-        self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
-        self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
-        self.cov_pq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
-        self.var_p = np.ones(self.c.product_catalogue_size, dtype=np.float32)
+        self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
+        self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
+        self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)

    def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
        self.step_count += 1
-        demand = _extract_demand(observation, self.c.product_catalogue_size)
+        # extract demand signal (from env observation) as proxy for sales
+        demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
        return self._update_from_demand(current_prices, demand)

    def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray: