Airflow addition (#28)

* introducing airflow to run pipeline

* chore: updating dag with upload to registry

* introducing complete provider (non refactored and noisy)

* chore: removing old shit

* generic pricing baselines

* feature: super simple model registry (to be updated maybe third party OS software)

* chore: refactoring the providers docker config and requirements

* chore: refactored and broke down components (braking

* exporting all

* local pipeline excution working

* fix: fixing import structures from nonrelativistic

* chore: enables cross comm pickling with fully e2e pipeline compilation

* docs: what the pipeline is like now

* pipelines local running and pipeline high level definition

* cleaning old pipeline and vectorization

* leaked but fixing, not so important

* test: started with pipeline step testing

* chore: cleaning up provider of prices

* test: extra tests wit hsemantic meaning checks

* migrating pricers

* feature: introducing pricing predictors (pricers)

* chore: e2e is done with new pipeline

* extra session feature extraction

* feature: experiemntal sessin pricer and metrics(vibe)

* chore: redefined and connected pricers (#29)
This commit is contained in:
Daniel Alves Rösel
2025-11-29 17:50:16 +01:00
committed by GitHub
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions

View File

@@ -0,0 +1,149 @@
import numpy as np
import pandas as pd
from typing import Optional, List, Dict, Any
from dataclasses import dataclass, field
from procesing.steps.base import BaseContextStep
from procesing.pricers import ElasticityBasedPricer
@dataclass
class StateSpace:
"""
State representation for pricing functions.
Components:
Q_t: demand ∈ R^n (current demand signal per product)
P_t: prices ∈ R^n (current/base prices)
S_t: session_features (behavioral signals, interaction data)
H_t: history = {Q_{t-k}, P_{t-k}, S_{t-k}} for k in [1, history_length]
Additionally stores:
- product_ids: product identifiers (n,)
- elasticity: price elasticity per product (n,)
- metadata: arbitrary context (experiment_id, timestamp, etc.)
"""
demand: np.ndarray # Q_t ∈ R^n
prices: np.ndarray # P_t ∈ R^n
session_features: pd.DataFrame = field(default_factory=pd.DataFrame) # S_t
# augmented state components
product_ids: Optional[np.ndarray] = None
elasticity: Optional[np.ndarray] = None
# historical trajectory H_t = {(Q_{t-k}, P_{t-k}, S_{t-k})}
history: List[Dict[str, Any]] = field(default_factory=list)
# metadata for context
metadata: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Validate dimensions."""
n = len(self.demand)
assert len(self.prices) == n, "demand and prices must have same dimension"
if self.elasticity is not None:
assert len(self.elasticity) == n, "elasticity must match dimension"
if self.product_ids is not None:
assert len(self.product_ids) == n, "product_ids must match dimension"
@property
def n_products(self) -> int:
"""Number of products in state space."""
return len(self.demand)
def add_history(self, q: np.ndarray, p: np.ndarray, s: pd.DataFrame, max_length: int = 10):
"""Append historical state to trajectory H_t."""
self.history.append({'demand': q, 'prices': p, 'session_features': s})
if len(self.history) > max_length:
self.history.pop(0)
def get_history_window(self, k: int = 5) -> List[Dict[str, Any]]:
"""Retrieve last k historical states."""
return self.history[-k:] if len(self.history) >= k else self.history
class BuildStateSpaceStep(BaseContextStep):
"""
Build state space from elasticity, demand, and price data.
Input: elasticity_df [productId, elasticity, ...], optional demand_df
Output: StateSpace instance with Q_t, P_t, elasticity, product_ids
"""
def transform(self, elasticity_df: pd.DataFrame, demand_df: Optional[pd.DataFrame] = None):
products = self.context.products
# extract base prices from product metadata
products_with_prices = products.copy()
if 'metadata' in products_with_prices.columns:
products_with_prices['base_price'] = products_with_prices['metadata'].apply(
lambda m: m.get('base_price', 0) if isinstance(m, dict) else 0
)
else:
products_with_prices['base_price'] = 0
# merge with elasticity
merged = products_with_prices[['id', 'base_price']].rename(
columns={'id': 'productId'}
).merge(
elasticity_df[['productId', 'elasticity']],
on='productId',
how='left'
).fillna({'elasticity': 0.0, 'base_price': 0.0})
# merge with demand if provided, else use default
if demand_df is not None and 'demand' in demand_df.columns:
merged = merged.merge(
demand_df[['productId', 'demand']],
on='productId',
how='left'
).fillna({'demand': 0.0})
demand_vector = merged['demand'].values
else:
# default: uniform demand or use elasticity as proxy
demand_vector = np.ones(len(merged)) * 10.0
return StateSpace(
demand=demand_vector,
prices=merged['base_price'].values,
session_features=pd.DataFrame(),
product_ids=merged['productId'].values,
elasticity=merged['elasticity'].values,
metadata={'timestamp': pd.Timestamp.now().isoformat()}
)
class FitPricingFunctionStep(BaseContextStep):
"""
Fit pricing function using elasticity data.
Input: elasticity_df
Output: fitted pricing function instance
"""
def transform(self, elasticity_df: pd.DataFrame):
pricing_class = self.context.config.get('pricing_function_class', ElasticityBasedPricer)
pricing_params = self.context.config.get('pricing_function_params', {})
pricer = pricing_class(**pricing_params)
pricer.fit(elasticity_df)
return pricer
class PredictPricesStep(BaseContextStep):
"""
Predict optimal prices using fitted pricing function.
Input: (pricer, state_space)
Output: prices_df [productId, predicted_price]
"""
def transform(self, data: tuple):
pricer, state_space = data
products = self.context.products
product_ids = products['id'].values
predicted_prices = pricer.predict(state_space)
return pd.DataFrame({
'productId': product_ids,
'predicted_price': predicted_prices
})