Airflow addition (#28)

* introducing airflow to run pipeline

* chore: updating dag with upload to registry

* introducing complete provider (non refactored and noisy)

* chore: removing old shit

* generic pricing baselines

* feature: super simple model registry (to be updated maybe third party OS software)

* chore: refactoring the providers docker config and requirements

* chore: refactored and broke down components (braking

* exporting all

* local pipeline excution working

* fix: fixing import structures from nonrelativistic

* chore: enables cross comm pickling with fully e2e pipeline compilation

* docs: what the pipeline is like now

* pipelines local running and pipeline high level definition

* cleaning old pipeline and vectorization

* leaked but fixing, not so important

* test: started with pipeline step testing

* chore: cleaning up provider of prices

* test: extra tests wit hsemantic meaning checks

* migrating pricers

* feature: introducing pricing predictors (pricers)

* chore: e2e is done with new pipeline

* extra session feature extraction

* feature: experiemntal sessin pricer and metrics(vibe)

* chore: redefined and connected pricers (#29)
This commit is contained in:
Daniel Alves Rösel
2025-11-29 17:50:16 +01:00
committed by GitHub
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions

View File

@@ -0,0 +1,13 @@
from procesing.pricers.base import PricingFunction
from procesing.pricers.elasticity import ElasticityBasedPricer
from procesing.pricers.simple import StaticPricer, RandomPricer
from procesing.pricers.session_aware import SessionAwarePricer, ProductSpecificSessionPricer
__all__ = [
'PricingFunction',
'ElasticityBasedPricer',
'StaticPricer',
'RandomPricer',
'SessionAwarePricer',
'ProductSpecificSessionPricer'
]

View File

@@ -0,0 +1,70 @@
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, List
import numpy as np
import pandas as pd
class PricingFunction(ABC):
"""
Abstract base for pricing functions.
Defines mapping: f(Q_t, P_t, S_t, H_t) -> P_{t+1}
Where:
Q_t ∈ R^n: demand vector at time t
P_t ∈ R^n: price vector at time t
S_t: session features (behavioral signals, interactions)
H_t = {Q_{t-k}, P_{t-k}, S_{t-k}}: historical state trajectory
Objective:
maximize E[R_T] = E[Σ P_t^T · Q_t]
subject to:
Q_t = g(P_t, S_t) (demand response via elasticity)
P_t ≥ C (cost floor)
minimize L_agent = R_oracle - R_observed
"""
@abstractmethod
def fit(self, historical_data: pd.DataFrame, **kwargs):
"""
Offline training on historical data.
Args:
historical_data: DataFrame with elasticity, prices, demand signals
**kwargs: additional training parameters
"""
pass
@abstractmethod
def predict(self, state_space) -> np.ndarray:
"""
Generate optimal prices given current state.
Args:
state_space: StateSpace object containing Q_t, P_t, S_t, H_t
Returns:
P_{t+1}: price vector in R^n
"""
pass
def update(self, observation: Dict[str, Any]):
"""
Online learning update (optional).
Args:
observation: dict with {state, action, reward, next_state}
- state: StateSpace before pricing decision
- action: prices shown (P_t)
- reward: revenue/conversion signal
- next_state: StateSpace after user interaction
"""
pass # default: no online learning
def get_params(self) -> Dict[str, Any]:
"""Return pricing function parameters for serialization."""
return {}
def set_params(self, params: Dict[str, Any]):
"""Load pricing function parameters from dict."""
pass

View File

@@ -0,0 +1,59 @@
import numpy as np
import pandas as pd
from procesing.pricers.base import PricingFunction
class ElasticityBasedPricer(PricingFunction):
"""
Pricing based on demand elasticity estimates.
f(Q, S) = base_price * (1 + alpha * elasticity * demand_deviation)
"""
def __init__(self, alpha: float = 0.1, price_floor: float = 0.0, price_ceil: float = np.inf):
self.alpha = alpha
self.price_floor = price_floor
self.price_ceil = price_ceil
self.elasticity = None
self.base_prices = None
self.mean_demand = None
def fit(self, historical_data: pd.DataFrame):
"""
Calibrate from historical elasticity estimates.
Expects: [productId, elasticity, base_price, mean_demand]
"""
if 'elasticity' not in historical_data.columns:
raise ValueError("historical_data must contain 'elasticity' column")
self.elasticity = historical_data['elasticity'].values
self.base_prices = (historical_data['base_price'].values
if 'base_price' in historical_data.columns
else np.ones(len(historical_data)) * 100)
self.mean_demand = (historical_data['mean_demand'].values
if 'mean_demand' in historical_data.columns
else np.ones(len(historical_data)) * 10)
return self
def predict(self, state_space) -> np.ndarray:
"""
Adjust prices based on demand deviation and elasticity.
Higher demand -> increase price (but less for elastic goods)
"""
if self.elasticity is None:
raise ValueError("Must call fit() before predict()")
demand = np.asarray(state_space.demand)
if len(demand) != len(self.elasticity):
raise ValueError(f"Demand vector size {len(demand)} != elasticity size {len(self.elasticity)}")
# compute demand deviation from mean
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
# adjust price: if demand high and elastic, don't increase much
# if demand high and inelastic, increase more
price_multiplier = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
prices = self.base_prices * price_multiplier
# enforce bounds
prices = np.clip(prices, self.price_floor, self.price_ceil)
return prices

View File

@@ -0,0 +1,172 @@
"""
Session-aware pricing functions that leverage behavioral features S_t.
These pricers aim to minimize L_agent = R_oracle - R_observed.
"""
import numpy as np
import pandas as pd
from procesing.pricers.base import PricingFunction
from procesing.pricers.elasticity import ElasticityBasedPricer
class SessionAwarePricer(PricingFunction):
"""
Extends elasticity-based pricing with session behavioral signals.
f(Q, P, S) = base_price * elasticity_factor * session_factor
Where session_factor adjusts for:
- interaction_velocity (agent detection proxy)
- product_view_depth (interest signal)
- cart_to_view_ratio (conversion intent)
Strategy: charge higher prices to suspected agents (high velocity)
to recover oracle revenue from reconnaissance sessions.
"""
def __init__(self,
alpha: float = 0.1,
beta_velocity: float = 0.05,
beta_attention: float = 0.03,
agent_velocity_threshold: float = 5.0,
agent_markup: float = 1.2,
price_floor: float = 0.0,
price_ceil: float = np.inf):
"""
Args:
alpha: elasticity sensitivity
beta_velocity: interaction velocity weight
beta_attention: product attention weight
agent_velocity_threshold: velocity above which to apply agent markup
agent_markup: price multiplier for suspected agent sessions
price_floor, price_ceil: price bounds
"""
self.alpha = alpha
self.beta_velocity = beta_velocity
self.beta_attention = beta_attention
self.agent_velocity_threshold = agent_velocity_threshold
self.agent_markup = agent_markup
self.price_floor = price_floor
self.price_ceil = price_ceil
# fitted parameters
self.elasticity = None
self.base_prices = None
self.mean_demand = None
def fit(self, historical_data: pd.DataFrame, **kwargs):
"""Calibrate from historical elasticity data."""
if 'elasticity' not in historical_data.columns:
raise ValueError("historical_data must contain 'elasticity'")
self.elasticity = historical_data['elasticity'].values
self.base_prices = (historical_data['base_price'].values
if 'base_price' in historical_data.columns
else np.ones(len(historical_data)) * 100)
self.mean_demand = (historical_data['mean_demand'].values
if 'mean_demand' in historical_data.columns
else np.ones(len(historical_data)) * 10)
return self
def predict(self, state_space) -> np.ndarray:
"""Generate prices with session awareness."""
if self.elasticity is None:
raise ValueError("Must call fit() before predict()")
demand = np.asarray(state_space.demand)
n_products = len(demand)
# base elasticity-driven pricing
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
elasticity_factor = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
# session-aware adjustments
session_factor = np.ones(n_products)
if not state_space.session_features.empty:
sf = state_space.session_features.iloc[0] # single session features
# agent detection via velocity
velocity = sf.get('interaction_velocity', 0.0)
if velocity > self.agent_velocity_threshold:
# suspected agent: apply markup to recover oracle revenue
session_factor *= self.agent_markup
# attention signal: higher view depth -> user interested -> can charge more
view_depth = sf.get('product_view_depth', 0)
if view_depth > 0:
attention_boost = 1 + self.beta_attention * np.log1p(view_depth)
session_factor *= attention_boost
# cart presence: if user has items in cart, slightly increase prices
cart_to_view = sf.get('cart_to_view_ratio', 0.0)
if cart_to_view > 0.1:
session_factor *= (1 + 0.02) # small boost for conversion intent
prices = self.base_prices * elasticity_factor * session_factor
prices = np.clip(prices, self.price_floor, self.price_ceil)
return prices
class ProductSpecificSessionPricer(PricingFunction):
"""
Session-aware pricer with product-specific demand signals.
Uses S_t to extract per-product interaction counts and adjusts pricing
for products the user has already viewed/hovered.
Strategy: products viewed multiple times = high interest -> price up
"""
def __init__(self,
alpha: float = 0.1,
view_boost: float = 0.02,
max_view_boost: float = 0.15,
price_floor: float = 0.0,
price_ceil: float = np.inf):
self.alpha = alpha
self.view_boost = view_boost
self.max_view_boost = max_view_boost
self.price_floor = price_floor
self.price_ceil = price_ceil
self.elasticity = None
self.base_prices = None
self.mean_demand = None
self.product_ids = None
def fit(self, historical_data: pd.DataFrame, **kwargs):
if 'elasticity' not in historical_data.columns or 'productId' not in historical_data.columns:
raise ValueError("historical_data must contain 'elasticity' and 'productId'")
self.elasticity = historical_data['elasticity'].values
self.base_prices = (historical_data['base_price'].values
if 'base_price' in historical_data.columns
else np.ones(len(historical_data)) * 100)
self.mean_demand = (historical_data['mean_demand'].values
if 'mean_demand' in historical_data.columns
else np.ones(len(historical_data)) * 10)
self.product_ids = historical_data['productId'].values
return self
def predict(self, state_space) -> np.ndarray:
if self.elasticity is None:
raise ValueError("Must call fit() before predict()")
demand = np.asarray(state_space.demand)
n_products = len(demand)
# base pricing
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
base_prices = self.base_prices * (1 + self.alpha * np.abs(self.elasticity) * demand_dev)
# product-specific session adjustments
if not state_space.session_features.empty and state_space.product_ids is not None:
# extract product interaction counts from session metadata
# (this would require session features to include per-product signals)
# for now, use uniform boost as placeholder
# TODO: extend session feature extraction to include product-specific counts
pass
prices = np.clip(base_prices, self.price_floor, self.price_ceil)
return prices

View File

@@ -0,0 +1,48 @@
import numpy as np
import pandas as pd
from procesing.pricers.base import PricingFunction
class StaticPricer(PricingFunction):
"""Static pricing: always return fixed base prices"""
def __init__(self, base_prices: np.ndarray = None):
self.base_prices = base_prices
def fit(self, historical_data: pd.DataFrame):
"""Extract base prices from historical data"""
if 'base_price' in historical_data.columns:
self.base_prices = historical_data['base_price'].values
elif 'price' in historical_data.columns:
self.base_prices = historical_data['price'].values
else:
raise ValueError("historical_data must contain 'base_price' or 'price' column")
return self
def predict(self, state_space) -> np.ndarray:
"""Return static base prices regardless of state"""
if self.base_prices is None:
raise ValueError("Must call fit() or provide base_prices in constructor")
return self.base_prices.copy()
class RandomPricer(PricingFunction):
"""Random pricing within bounds (for baseline comparison)"""
def __init__(self, price_min: float = 50.0, price_max: float = 500.0, seed: int = None):
self.price_min = price_min
self.price_max = price_max
self.seed = seed
self.n_products = None
self.rng = np.random.default_rng(seed)
def fit(self, historical_data: pd.DataFrame):
"""Learn number of products"""
self.n_products = len(historical_data)
return self
def predict(self, state_space) -> np.ndarray:
"""Generate random prices"""
if self.n_products is None:
self.n_products = len(state_space.demand)
return self.rng.uniform(self.price_min, self.price_max, size=self.n_products)