mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
Merge branch 'agent-behavior-loader-developemen' into feat-strong-learning-implementation-with-data-contamination
This commit is contained in:
@@ -18,8 +18,6 @@ try:
|
||||
except ImportError:
|
||||
lib_make_state_repr = None
|
||||
lib_transition_histogram = None
|
||||
print("lib no includable")
|
||||
|
||||
|
||||
|
||||
class BehaviorModel:
|
||||
@@ -226,7 +224,7 @@ if __name__ == "__main__":
|
||||
|
||||
agent_model = AgentBehaviorModel(agent_dir)
|
||||
agent_mdp = agent_model.build_MDP()
|
||||
print(agent_mdp)
|
||||
|
||||
print(f"AGENT... Built MDP: {agent_mdp['num_states']} states, "
|
||||
f"{sum(len(t) for t in agent_mdp['transitions'].values())} transitions")
|
||||
if not agent_mdp['states']:
|
||||
@@ -235,8 +233,6 @@ if __name__ == "__main__":
|
||||
|
||||
human_evt = aggregate_event_transitions(human_mdp)
|
||||
agent_evt = aggregate_event_transitions(agent_mdp)
|
||||
print(agent_evt)
|
||||
|
||||
|
||||
common = set(human_evt.keys()) & set(agent_evt.keys())
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from os import kill
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any
|
||||
from sim.rl.environment import BusinessLogicConstraints
|
||||
|
||||
|
||||
"""
|
||||
An angine by default should have its own demand estimation mechanism from the observed observations whihc are the computer feature.
|
||||
From these features we then follow the researc hstructure of q -> p with a testable and must be updatable mechanism.
|
||||
@@ -39,6 +39,7 @@ class BasePricingEngine(ABC):
|
||||
|
||||
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""reset engine state for new episode"""
|
||||
self.step_count = 0
|
||||
@@ -68,15 +69,16 @@ class WildPricingEngine(BasePricingEngine):
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
self.e_hat = np.full((self.c.product_catalogue_size,), -1.3, dtype=np.float32)
|
||||
self.mu_logp = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.mu_logq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.cov_pq = np.zeros(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.var_p = np.ones(self.c.product_catalogue_size, dtype=np.float32)
|
||||
self.e_hat = np.full((self.c.product_catelogue_size,), -1.3, dtype=np.float32)
|
||||
self.mu_logp = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.mu_logq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.cov_pq = np.zeros(self.c.product_catelogue_size, dtype=np.float32)
|
||||
self.var_p = np.ones(self.c.product_catelogue_size, dtype=np.float32)
|
||||
|
||||
def compute_prices(self, current_prices: np.ndarray, observation: Dict[str, Any]) -> np.ndarray:
|
||||
self.step_count += 1
|
||||
demand = _extract_demand(observation, self.c.product_catalogue_size)
|
||||
# extract demand signal (from env observation) as proxy for sales
|
||||
demand = observation.get('demand', np.zeros(self.c.product_catelogue_size, dtype=np.float32))
|
||||
return self._update_from_demand(current_prices, demand)
|
||||
|
||||
def _update_from_demand(self, prices: np.ndarray, sold: np.ndarray) -> np.ndarray:
|
||||
|
||||
Reference in New Issue
Block a user