Airflow addition (#28)

* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
2026-07-16 01:53:37 +00:00 · 2025-11-29 17:50:16 +01:00
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions
--- a/experiments/procesing/pricers/init.py
+++ b/experiments/procesing/pricers/init.py
@@ -0,0 +1,13 @@
+from procesing.pricers.base import PricingFunction
+from procesing.pricers.elasticity import ElasticityBasedPricer
+from procesing.pricers.simple import StaticPricer, RandomPricer
+from procesing.pricers.session_aware import SessionAwarePricer, ProductSpecificSessionPricer
+
+__all__ = [
+    'PricingFunction',
+    'ElasticityBasedPricer',
+    'StaticPricer',
+    'RandomPricer',
+    'SessionAwarePricer',
+    'ProductSpecificSessionPricer'
+]
--- a/experiments/procesing/pricers/base.py
+++ b/experiments/procesing/pricers/base.py
@@ -0,0 +1,70 @@
+from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any, List
+import numpy as np
+import pandas as pd
+
+
+class PricingFunction(ABC):
+    """
+    Abstract base for pricing functions.
+
+    Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
+
+    Where:
+        Q_t ∈ R^n: demand vector at time t
+        P_t ∈ R^n: price vector at time t
+        S_t: session features (behavioral signals, interactions)
+        H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
+
+    Objective:
+        maximize E[R_T] = E[Σ P_t^T · Q_t]
+        subject to:
+            Q_t = g(P_t, S_t)  (demand response via elasticity)
+            P_t ≥ C  (cost floor)
+            minimize L_agent = R_oracle - R_observed
+    """
+
+    @abstractmethod
+    def fit(self, historical_data: pd.DataFrame, **kwargs):
+        """
+        Offline training on historical data.
+
+        Args:
+            historical_data: DataFrame with elasticity, prices, demand signals
+            **kwargs: additional training parameters
+        """
+        pass
+
+    @abstractmethod
+    def predict(self, state_space) -> np.ndarray:
+        """
+        Generate optimal prices given current state.
+
+        Args:
+            state_space: StateSpace object containing Q_t, P_t, S_t, H_t
+
+        Returns:
+            P_{t+1}: price vector in R^n
+        """
+        pass
+
+    def update(self, observation: Dict[str, Any]):
+        """
+        Online learning update (optional).
+
+        Args:
+            observation: dict with {state, action, reward, next_state}
+                - state: StateSpace before pricing decision
+                - action: prices shown (P_t)
+                - reward: revenue/conversion signal
+                - next_state: StateSpace after user interaction
+        """
+        pass  # default: no online learning
+
+    def get_params(self) -> Dict[str, Any]:
+        """Return pricing function parameters for serialization."""
+        return {}
+
+    def set_params(self, params: Dict[str, Any]):
+        """Load pricing function parameters from dict."""
+        pass
--- a/experiments/procesing/pricers/elasticity.py
+++ b/experiments/procesing/pricers/elasticity.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pandas as pd
+from procesing.pricers.base import PricingFunction
+
+
+class ElasticityBasedPricer(PricingFunction):
+    """
+    Pricing based on demand elasticity estimates.
+    f(Q, S) = base_price * (1 + alpha * elasticity * demand_deviation)
+    """
+
+    def __init__(self, alpha: float = 0.1, price_floor: float = 0.0, price_ceil: float = np.inf):
+        self.alpha = alpha
+        self.price_floor = price_floor
+        self.price_ceil = price_ceil
+        self.elasticity = None
+        self.base_prices = None
+        self.mean_demand = None
+
+    def fit(self, historical_data: pd.DataFrame):
+        """
+        Calibrate from historical elasticity estimates.
+        Expects: [productId, elasticity, base_price, mean_demand]
+        """
+        if 'elasticity' not in historical_data.columns:
+            raise ValueError("historical_data must contain 'elasticity' column")
+
+        self.elasticity = historical_data['elasticity'].values
+        self.base_prices = (historical_data['base_price'].values
+                           if 'base_price' in historical_data.columns
+                           else np.ones(len(historical_data)) * 100)
+        self.mean_demand = (historical_data['mean_demand'].values
+                           if 'mean_demand' in historical_data.columns
+                           else np.ones(len(historical_data)) * 10)
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        """
+        Adjust prices based on demand deviation and elasticity.
+        Higher demand -> increase price (but less for elastic goods)
+        """
+        if self.elasticity is None:
+            raise ValueError("Must call fit() before predict()")
+
+        demand = np.asarray(state_space.demand)
+        if len(demand) != len(self.elasticity):
+            raise ValueError(f"Demand vector size {len(demand)} != elasticity size {len(self.elasticity)}")
+
+        # compute demand deviation from mean
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+
+        # adjust price: if demand high and elastic, don't increase much
+        # if demand high and inelastic, increase more
+        price_multiplier = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
+        prices = self.base_prices * price_multiplier
+
+        # enforce bounds
+        prices = np.clip(prices, self.price_floor, self.price_ceil)
+        return prices
--- a/experiments/procesing/pricers/session_aware.py
+++ b/experiments/procesing/pricers/session_aware.py
@@ -0,0 +1,172 @@
+"""
+Session-aware pricing functions that leverage behavioral features S_t.
+These pricers aim to minimize L_agent = R_oracle - R_observed.
+"""
+import numpy as np
+import pandas as pd
+from procesing.pricers.base import PricingFunction
+from procesing.pricers.elasticity import ElasticityBasedPricer
+
+
+class SessionAwarePricer(PricingFunction):
+    """
+    Extends elasticity-based pricing with session behavioral signals.
+
+    f(Q, P, S) = base_price * elasticity_factor * session_factor
+
+    Where session_factor adjusts for:
+        - interaction_velocity (agent detection proxy)
+        - product_view_depth (interest signal)
+        - cart_to_view_ratio (conversion intent)
+
+    Strategy: charge higher prices to suspected agents (high velocity)
+    to recover oracle revenue from reconnaissance sessions.
+    """
+
+    def __init__(self,
+                 alpha: float = 0.1,
+                 beta_velocity: float = 0.05,
+                 beta_attention: float = 0.03,
+                 agent_velocity_threshold: float = 5.0,
+                 agent_markup: float = 1.2,
+                 price_floor: float = 0.0,
+                 price_ceil: float = np.inf):
+        """
+        Args:
+            alpha: elasticity sensitivity
+            beta_velocity: interaction velocity weight
+            beta_attention: product attention weight
+            agent_velocity_threshold: velocity above which to apply agent markup
+            agent_markup: price multiplier for suspected agent sessions
+            price_floor, price_ceil: price bounds
+        """
+        self.alpha = alpha
+        self.beta_velocity = beta_velocity
+        self.beta_attention = beta_attention
+        self.agent_velocity_threshold = agent_velocity_threshold
+        self.agent_markup = agent_markup
+        self.price_floor = price_floor
+        self.price_ceil = price_ceil
+
+        # fitted parameters
+        self.elasticity = None
+        self.base_prices = None
+        self.mean_demand = None
+
+    def fit(self, historical_data: pd.DataFrame, **kwargs):
+        """Calibrate from historical elasticity data."""
+        if 'elasticity' not in historical_data.columns:
+            raise ValueError("historical_data must contain 'elasticity'")
+
+        self.elasticity = historical_data['elasticity'].values
+        self.base_prices = (historical_data['base_price'].values
+                           if 'base_price' in historical_data.columns
+                           else np.ones(len(historical_data)) * 100)
+        self.mean_demand = (historical_data['mean_demand'].values
+                           if 'mean_demand' in historical_data.columns
+                           else np.ones(len(historical_data)) * 10)
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        """Generate prices with session awareness."""
+        if self.elasticity is None:
+            raise ValueError("Must call fit() before predict()")
+
+        demand = np.asarray(state_space.demand)
+        n_products = len(demand)
+
+        # base elasticity-driven pricing
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+        elasticity_factor = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
+
+        # session-aware adjustments
+        session_factor = np.ones(n_products)
+
+        if not state_space.session_features.empty:
+            sf = state_space.session_features.iloc[0]  # single session features
+
+            # agent detection via velocity
+            velocity = sf.get('interaction_velocity', 0.0)
+            if velocity > self.agent_velocity_threshold:
+                # suspected agent: apply markup to recover oracle revenue
+                session_factor *= self.agent_markup
+
+            # attention signal: higher view depth -> user interested -> can charge more
+            view_depth = sf.get('product_view_depth', 0)
+            if view_depth > 0:
+                attention_boost = 1 + self.beta_attention * np.log1p(view_depth)
+                session_factor *= attention_boost
+
+            # cart presence: if user has items in cart, slightly increase prices
+            cart_to_view = sf.get('cart_to_view_ratio', 0.0)
+            if cart_to_view > 0.1:
+                session_factor *= (1 + 0.02)  # small boost for conversion intent
+
+        prices = self.base_prices * elasticity_factor * session_factor
+        prices = np.clip(prices, self.price_floor, self.price_ceil)
+
+        return prices
+
+
+class ProductSpecificSessionPricer(PricingFunction):
+    """
+    Session-aware pricer with product-specific demand signals.
+
+    Uses S_t to extract per-product interaction counts and adjusts pricing
+    for products the user has already viewed/hovered.
+
+    Strategy: products viewed multiple times = high interest -> price up
+    """
+
+    def __init__(self,
+                 alpha: float = 0.1,
+                 view_boost: float = 0.02,
+                 max_view_boost: float = 0.15,
+                 price_floor: float = 0.0,
+                 price_ceil: float = np.inf):
+        self.alpha = alpha
+        self.view_boost = view_boost
+        self.max_view_boost = max_view_boost
+        self.price_floor = price_floor
+        self.price_ceil = price_ceil
+
+        self.elasticity = None
+        self.base_prices = None
+        self.mean_demand = None
+        self.product_ids = None
+
+    def fit(self, historical_data: pd.DataFrame, **kwargs):
+        if 'elasticity' not in historical_data.columns or 'productId' not in historical_data.columns:
+            raise ValueError("historical_data must contain 'elasticity' and 'productId'")
+
+        self.elasticity = historical_data['elasticity'].values
+        self.base_prices = (historical_data['base_price'].values
+                           if 'base_price' in historical_data.columns
+                           else np.ones(len(historical_data)) * 100)
+        self.mean_demand = (historical_data['mean_demand'].values
+                           if 'mean_demand' in historical_data.columns
+                           else np.ones(len(historical_data)) * 10)
+        self.product_ids = historical_data['productId'].values
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        if self.elasticity is None:
+            raise ValueError("Must call fit() before predict()")
+
+        demand = np.asarray(state_space.demand)
+        n_products = len(demand)
+
+        # base pricing
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+        base_prices = self.base_prices * (1 + self.alpha * np.abs(self.elasticity) * demand_dev)
+
+        # product-specific session adjustments
+        if not state_space.session_features.empty and state_space.product_ids is not None:
+            # extract product interaction counts from session metadata
+            # (this would require session features to include per-product signals)
+            # for now, use uniform boost as placeholder
+            # TODO: extend session feature extraction to include product-specific counts
+            pass
+
+        prices = np.clip(base_prices, self.price_floor, self.price_ceil)
+        return prices
--- a/experiments/procesing/pricers/simple.py
+++ b/experiments/procesing/pricers/simple.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pandas as pd
+from procesing.pricers.base import PricingFunction
+
+
+class StaticPricer(PricingFunction):
+    """Static pricing: always return fixed base prices"""
+
+    def __init__(self, base_prices: np.ndarray = None):
+        self.base_prices = base_prices
+
+    def fit(self, historical_data: pd.DataFrame):
+        """Extract base prices from historical data"""
+        if 'base_price' in historical_data.columns:
+            self.base_prices = historical_data['base_price'].values
+        elif 'price' in historical_data.columns:
+            self.base_prices = historical_data['price'].values
+        else:
+            raise ValueError("historical_data must contain 'base_price' or 'price' column")
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        """Return static base prices regardless of state"""
+        if self.base_prices is None:
+            raise ValueError("Must call fit() or provide base_prices in constructor")
+        return self.base_prices.copy()
+
+
+class RandomPricer(PricingFunction):
+    """Random pricing within bounds (for baseline comparison)"""
+
+    def __init__(self, price_min: float = 50.0, price_max: float = 500.0, seed: int = None):
+        self.price_min = price_min
+        self.price_max = price_max
+        self.seed = seed
+        self.n_products = None
+        self.rng = np.random.default_rng(seed)
+
+    def fit(self, historical_data: pd.DataFrame):
+        """Learn number of products"""
+        self.n_products = len(historical_data)
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        """Generate random prices"""
+        if self.n_products is None:
+            self.n_products = len(state_space.demand)
+        return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)