Airflow addition (#28)

* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
2026-07-16 01:53:37 +00:00 · 2025-11-29 17:50:16 +01:00
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions
--- a/experiments/procesing/pricers/session_aware.py
+++ b/experiments/procesing/pricers/session_aware.py
@@ -0,0 +1,172 @@
+"""
+Session-aware pricing functions that leverage behavioral features S_t.
+These pricers aim to minimize L_agent = R_oracle - R_observed.
+"""
+import numpy as np
+import pandas as pd
+from procesing.pricers.base import PricingFunction
+from procesing.pricers.elasticity import ElasticityBasedPricer
+
+
+class SessionAwarePricer(PricingFunction):
+    """
+    Extends elasticity-based pricing with session behavioral signals.
+
+    f(Q, P, S) = base_price * elasticity_factor * session_factor
+
+    Where session_factor adjusts for:
+        - interaction_velocity (agent detection proxy)
+        - product_view_depth (interest signal)
+        - cart_to_view_ratio (conversion intent)
+
+    Strategy: charge higher prices to suspected agents (high velocity)
+    to recover oracle revenue from reconnaissance sessions.
+    """
+
+    def __init__(self,
+                 alpha: float = 0.1,
+                 beta_velocity: float = 0.05,
+                 beta_attention: float = 0.03,
+                 agent_velocity_threshold: float = 5.0,
+                 agent_markup: float = 1.2,
+                 price_floor: float = 0.0,
+                 price_ceil: float = np.inf):
+        """
+        Args:
+            alpha: elasticity sensitivity
+            beta_velocity: interaction velocity weight
+            beta_attention: product attention weight
+            agent_velocity_threshold: velocity above which to apply agent markup
+            agent_markup: price multiplier for suspected agent sessions
+            price_floor, price_ceil: price bounds
+        """
+        self.alpha = alpha
+        self.beta_velocity = beta_velocity
+        self.beta_attention = beta_attention
+        self.agent_velocity_threshold = agent_velocity_threshold
+        self.agent_markup = agent_markup
+        self.price_floor = price_floor
+        self.price_ceil = price_ceil
+
+        # fitted parameters
+        self.elasticity = None
+        self.base_prices = None
+        self.mean_demand = None
+
+    def fit(self, historical_data: pd.DataFrame, **kwargs):
+        """Calibrate from historical elasticity data."""
+        if 'elasticity' not in historical_data.columns:
+            raise ValueError("historical_data must contain 'elasticity'")
+
+        self.elasticity = historical_data['elasticity'].values
+        self.base_prices = (historical_data['base_price'].values
+                           if 'base_price' in historical_data.columns
+                           else np.ones(len(historical_data)) * 100)
+        self.mean_demand = (historical_data['mean_demand'].values
+                           if 'mean_demand' in historical_data.columns
+                           else np.ones(len(historical_data)) * 10)
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        """Generate prices with session awareness."""
+        if self.elasticity is None:
+            raise ValueError("Must call fit() before predict()")
+
+        demand = np.asarray(state_space.demand)
+        n_products = len(demand)
+
+        # base elasticity-driven pricing
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+        elasticity_factor = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
+
+        # session-aware adjustments
+        session_factor = np.ones(n_products)
+
+        if not state_space.session_features.empty:
+            sf = state_space.session_features.iloc[0]  # single session features
+
+            # agent detection via velocity
+            velocity = sf.get('interaction_velocity', 0.0)
+            if velocity > self.agent_velocity_threshold:
+                # suspected agent: apply markup to recover oracle revenue
+                session_factor *= self.agent_markup
+
+            # attention signal: higher view depth -> user interested -> can charge more
+            view_depth = sf.get('product_view_depth', 0)
+            if view_depth > 0:
+                attention_boost = 1 + self.beta_attention * np.log1p(view_depth)
+                session_factor *= attention_boost
+
+            # cart presence: if user has items in cart, slightly increase prices
+            cart_to_view = sf.get('cart_to_view_ratio', 0.0)
+            if cart_to_view > 0.1:
+                session_factor *= (1 + 0.02)  # small boost for conversion intent
+
+        prices = self.base_prices * elasticity_factor * session_factor
+        prices = np.clip(prices, self.price_floor, self.price_ceil)
+
+        return prices
+
+
+class ProductSpecificSessionPricer(PricingFunction):
+    """
+    Session-aware pricer with product-specific demand signals.
+
+    Uses S_t to extract per-product interaction counts and adjusts pricing
+    for products the user has already viewed/hovered.
+
+    Strategy: products viewed multiple times = high interest -> price up
+    """
+
+    def __init__(self,
+                 alpha: float = 0.1,
+                 view_boost: float = 0.02,
+                 max_view_boost: float = 0.15,
+                 price_floor: float = 0.0,
+                 price_ceil: float = np.inf):
+        self.alpha = alpha
+        self.view_boost = view_boost
+        self.max_view_boost = max_view_boost
+        self.price_floor = price_floor
+        self.price_ceil = price_ceil
+
+        self.elasticity = None
+        self.base_prices = None
+        self.mean_demand = None
+        self.product_ids = None
+
+    def fit(self, historical_data: pd.DataFrame, **kwargs):
+        if 'elasticity' not in historical_data.columns or 'productId' not in historical_data.columns:
+            raise ValueError("historical_data must contain 'elasticity' and 'productId'")
+
+        self.elasticity = historical_data['elasticity'].values
+        self.base_prices = (historical_data['base_price'].values
+                           if 'base_price' in historical_data.columns
+                           else np.ones(len(historical_data)) * 100)
+        self.mean_demand = (historical_data['mean_demand'].values
+                           if 'mean_demand' in historical_data.columns
+                           else np.ones(len(historical_data)) * 10)
+        self.product_ids = historical_data['productId'].values
+        return self
+
+    def predict(self, state_space) -> np.ndarray:
+        if self.elasticity is None:
+            raise ValueError("Must call fit() before predict()")
+
+        demand = np.asarray(state_space.demand)
+        n_products = len(demand)
+
+        # base pricing
+        demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
+        base_prices = self.base_prices * (1 + self.alpha * np.abs(self.elasticity) * demand_dev)
+
+        # product-specific session adjustments
+        if not state_space.session_features.empty and state_space.product_ids is not None:
+            # extract product interaction counts from session metadata
+            # (this would require session features to include per-product signals)
+            # for now, use uniform boost as placeholder
+            # TODO: extend session feature extraction to include product-specific counts
+            pass
+
+        prices = np.clip(base_prices, self.price_floor, self.price_ceil)
+        return prices