Airflow addition (#28)

* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
2026-07-16 01:53:37 +00:00 · 2025-11-29 17:50:16 +01:00
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions
--- a/experiments/procesing/steps/pricing.py
+++ b/experiments/procesing/steps/pricing.py
@@ -0,0 +1,149 @@
+import numpy as np
+import pandas as pd
+from typing import Optional, List, Dict, Any
+from dataclasses import dataclass, field
+from procesing.steps.base import BaseContextStep
+from procesing.pricers import ElasticityBasedPricer
+
+@dataclass
+class StateSpace:
+    """
+    State representation for pricing functions.
+
+    Components:
+        Q_t: demand ∈ R^n (current demand signal per product)
+        P_t: prices ∈ R^n (current/base prices)
+        S_t: session_features (behavioral signals, interaction data)
+        H_t: history = {Q_{t-k}, P_{t-k}, S_{t-k}} for k in [1, history_length]
+
+    Additionally stores:
+        - product_ids: product identifiers (n,)
+        - elasticity: price elasticity per product (n,)
+        - metadata: arbitrary context (experiment_id, timestamp, etc.)
+    """
+    demand: np.ndarray  # Q_t ∈ R^n
+    prices: np.ndarray  # P_t ∈ R^n
+    session_features: pd.DataFrame = field(default_factory=pd.DataFrame)  # S_t
+
+    # augmented state components
+    product_ids: Optional[np.ndarray] = None
+    elasticity: Optional[np.ndarray] = None
+
+    # historical trajectory H_t = {(Q_{t-k}, P_{t-k}, S_{t-k})}
+    history: List[Dict[str, Any]] = field(default_factory=list)
+
+    # metadata for context
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self):
+        """Validate dimensions."""
+        n = len(self.demand)
+        assert len(self.prices) == n, "demand and prices must have same dimension"
+        if self.elasticity is not None:
+            assert len(self.elasticity) == n, "elasticity must match dimension"
+        if self.product_ids is not None:
+            assert len(self.product_ids) == n, "product_ids must match dimension"
+
+    @property
+    def n_products(self) -> int:
+        """Number of products in state space."""
+        return len(self.demand)
+
+    def add_history(self, q: np.ndarray, p: np.ndarray, s: pd.DataFrame, max_length: int = 10):
+        """Append historical state to trajectory H_t."""
+        self.history.append({'demand': q, 'prices': p, 'session_features': s})
+        if len(self.history) > max_length:
+            self.history.pop(0)
+
+    def get_history_window(self, k: int = 5) -> List[Dict[str, Any]]:
+        """Retrieve last k historical states."""
+        return self.history[-k:] if len(self.history) >= k else self.history
+
+
+class BuildStateSpaceStep(BaseContextStep):
+    """
+    Build state space from elasticity, demand, and price data.
+
+    Input: elasticity_df [productId, elasticity, ...], optional demand_df
+    Output: StateSpace instance with Q_t, P_t, elasticity, product_ids
+    """
+
+    def transform(self, elasticity_df: pd.DataFrame, demand_df: Optional[pd.DataFrame] = None):
+        products = self.context.products
+
+        # extract base prices from product metadata
+        products_with_prices = products.copy()
+        if 'metadata' in products_with_prices.columns:
+            products_with_prices['base_price'] = products_with_prices['metadata'].apply(
+                lambda m: m.get('base_price', 0) if isinstance(m, dict) else 0
+            )
+        else:
+            products_with_prices['base_price'] = 0
+
+        # merge with elasticity
+        merged = products_with_prices[['id', 'base_price']].rename(
+            columns={'id': 'productId'}
+        ).merge(
+            elasticity_df[['productId', 'elasticity']],
+            on='productId',
+            how='left'
+        ).fillna({'elasticity': 0.0, 'base_price': 0.0})
+
+        # merge with demand if provided, else use default
+        if demand_df is not None and 'demand' in demand_df.columns:
+            merged = merged.merge(
+                demand_df[['productId', 'demand']],
+                on='productId',
+                how='left'
+            ).fillna({'demand': 0.0})
+            demand_vector = merged['demand'].values
+        else:
+            # default: uniform demand or use elasticity as proxy
+            demand_vector = np.ones(len(merged)) * 10.0
+
+        return StateSpace(
+            demand=demand_vector,
+            prices=merged['base_price'].values,
+            session_features=pd.DataFrame(),
+            product_ids=merged['productId'].values,
+            elasticity=merged['elasticity'].values,
+            metadata={'timestamp': pd.Timestamp.now().isoformat()}
+        )
+
+
+class FitPricingFunctionStep(BaseContextStep):
+    """
+    Fit pricing function using elasticity data.
+    Input: elasticity_df
+    Output: fitted pricing function instance
+    """
+
+    def transform(self, elasticity_df: pd.DataFrame):
+        pricing_class = self.context.config.get('pricing_function_class', ElasticityBasedPricer)
+        pricing_params = self.context.config.get('pricing_function_params', {})
+
+        pricer = pricing_class(**pricing_params)
+        pricer.fit(elasticity_df)
+
+        return pricer
+
+
+class PredictPricesStep(BaseContextStep):
+    """
+    Predict optimal prices using fitted pricing function.
+    Input: (pricer, state_space)
+    Output: prices_df [productId, predicted_price]
+    """
+
+    def transform(self, data: tuple):
+        pricer, state_space = data
+
+        products = self.context.products
+        product_ids = products['id'].values
+
+        predicted_prices = pricer.predict(state_space)
+
+        return pd.DataFrame({
+            'productId': product_ids,
+            'predicted_price': predicted_prices
+        })