Airflow addition (#28)

* introducing airflow to run pipeline

* chore: updating dag with upload to registry

* introducing complete provider (non refactored and noisy)

* chore: removing old shit

* generic pricing baselines

* feature: super simple model registry (to be updated maybe third party OS software)

* chore: refactoring the providers docker config and requirements

* chore: refactored and broke down components (braking

* exporting all

* local pipeline excution working

* fix: fixing import structures from nonrelativistic

* chore: enables cross comm pickling with fully e2e pipeline compilation

* docs: what the pipeline is like now

* pipelines local running and pipeline high level definition

* cleaning old pipeline and vectorization

* leaked but fixing, not so important

* test: started with pipeline step testing

* chore: cleaning up provider of prices

* test: extra tests wit hsemantic meaning checks

* migrating pricers

* feature: introducing pricing predictors (pricers)

* chore: e2e is done with new pipeline

* extra session feature extraction

* feature: experiemntal sessin pricer and metrics(vibe)

* chore: redefined and connected pricers (#29)
This commit is contained in:
Daniel Alves Rösel
2025-11-29 17:50:16 +01:00
committed by GitHub
parent 2a0e44ab24
commit ad9423bf59
49 changed files with 3642 additions and 619 deletions

View File

@@ -0,0 +1,172 @@
"""
Session-aware pricing functions that leverage behavioral features S_t.
These pricers aim to minimize L_agent = R_oracle - R_observed.
"""
import numpy as np
import pandas as pd
from procesing.pricers.base import PricingFunction
from procesing.pricers.elasticity import ElasticityBasedPricer
class SessionAwarePricer(PricingFunction):
"""
Extends elasticity-based pricing with session behavioral signals.
f(Q, P, S) = base_price * elasticity_factor * session_factor
Where session_factor adjusts for:
- interaction_velocity (agent detection proxy)
- product_view_depth (interest signal)
- cart_to_view_ratio (conversion intent)
Strategy: charge higher prices to suspected agents (high velocity)
to recover oracle revenue from reconnaissance sessions.
"""
def __init__(self,
alpha: float = 0.1,
beta_velocity: float = 0.05,
beta_attention: float = 0.03,
agent_velocity_threshold: float = 5.0,
agent_markup: float = 1.2,
price_floor: float = 0.0,
price_ceil: float = np.inf):
"""
Args:
alpha: elasticity sensitivity
beta_velocity: interaction velocity weight
beta_attention: product attention weight
agent_velocity_threshold: velocity above which to apply agent markup
agent_markup: price multiplier for suspected agent sessions
price_floor, price_ceil: price bounds
"""
self.alpha = alpha
self.beta_velocity = beta_velocity
self.beta_attention = beta_attention
self.agent_velocity_threshold = agent_velocity_threshold
self.agent_markup = agent_markup
self.price_floor = price_floor
self.price_ceil = price_ceil
# fitted parameters
self.elasticity = None
self.base_prices = None
self.mean_demand = None
def fit(self, historical_data: pd.DataFrame, **kwargs):
"""Calibrate from historical elasticity data."""
if 'elasticity' not in historical_data.columns:
raise ValueError("historical_data must contain 'elasticity'")
self.elasticity = historical_data['elasticity'].values
self.base_prices = (historical_data['base_price'].values
if 'base_price' in historical_data.columns
else np.ones(len(historical_data)) * 100)
self.mean_demand = (historical_data['mean_demand'].values
if 'mean_demand' in historical_data.columns
else np.ones(len(historical_data)) * 10)
return self
def predict(self, state_space) -> np.ndarray:
"""Generate prices with session awareness."""
if self.elasticity is None:
raise ValueError("Must call fit() before predict()")
demand = np.asarray(state_space.demand)
n_products = len(demand)
# base elasticity-driven pricing
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
elasticity_factor = 1 + self.alpha * np.abs(self.elasticity) * demand_dev
# session-aware adjustments
session_factor = np.ones(n_products)
if not state_space.session_features.empty:
sf = state_space.session_features.iloc[0] # single session features
# agent detection via velocity
velocity = sf.get('interaction_velocity', 0.0)
if velocity > self.agent_velocity_threshold:
# suspected agent: apply markup to recover oracle revenue
session_factor *= self.agent_markup
# attention signal: higher view depth -> user interested -> can charge more
view_depth = sf.get('product_view_depth', 0)
if view_depth > 0:
attention_boost = 1 + self.beta_attention * np.log1p(view_depth)
session_factor *= attention_boost
# cart presence: if user has items in cart, slightly increase prices
cart_to_view = sf.get('cart_to_view_ratio', 0.0)
if cart_to_view > 0.1:
session_factor *= (1 + 0.02) # small boost for conversion intent
prices = self.base_prices * elasticity_factor * session_factor
prices = np.clip(prices, self.price_floor, self.price_ceil)
return prices
class ProductSpecificSessionPricer(PricingFunction):
"""
Session-aware pricer with product-specific demand signals.
Uses S_t to extract per-product interaction counts and adjusts pricing
for products the user has already viewed/hovered.
Strategy: products viewed multiple times = high interest -> price up
"""
def __init__(self,
alpha: float = 0.1,
view_boost: float = 0.02,
max_view_boost: float = 0.15,
price_floor: float = 0.0,
price_ceil: float = np.inf):
self.alpha = alpha
self.view_boost = view_boost
self.max_view_boost = max_view_boost
self.price_floor = price_floor
self.price_ceil = price_ceil
self.elasticity = None
self.base_prices = None
self.mean_demand = None
self.product_ids = None
def fit(self, historical_data: pd.DataFrame, **kwargs):
if 'elasticity' not in historical_data.columns or 'productId' not in historical_data.columns:
raise ValueError("historical_data must contain 'elasticity' and 'productId'")
self.elasticity = historical_data['elasticity'].values
self.base_prices = (historical_data['base_price'].values
if 'base_price' in historical_data.columns
else np.ones(len(historical_data)) * 100)
self.mean_demand = (historical_data['mean_demand'].values
if 'mean_demand' in historical_data.columns
else np.ones(len(historical_data)) * 10)
self.product_ids = historical_data['productId'].values
return self
def predict(self, state_space) -> np.ndarray:
if self.elasticity is None:
raise ValueError("Must call fit() before predict()")
demand = np.asarray(state_space.demand)
n_products = len(demand)
# base pricing
demand_dev = (demand - self.mean_demand) / (self.mean_demand + 1e-6)
base_prices = self.base_prices * (1 + self.alpha * np.abs(self.elasticity) * demand_dev)
# product-specific session adjustments
if not state_space.session_features.empty and state_space.product_ids is not None:
# extract product interaction counts from session metadata
# (this would require session features to include per-product signals)
# for now, use uniform boost as placeholder
# TODO: extend session feature extraction to include product-specific counts
pass
prices = np.clip(base_prices, self.price_floor, self.price_ceil)
return prices