Files
PHANTOM/experiments/procesing/steps/demand.py
Daniel Alves Rösel ad9423bf59 Airflow addition (#28)
* introducing airflow to run pipeline

* chore: updating dag with upload to registry

* introducing complete provider (non refactored and noisy)

* chore: removing old shit

* generic pricing baselines

* feature: super simple model registry (to be updated maybe third party OS software)

* chore: refactoring the providers docker config and requirements

* chore: refactored and broke down components (braking

* exporting all

* local pipeline excution working

* fix: fixing import structures from nonrelativistic

* chore: enables cross comm pickling with fully e2e pipeline compilation

* docs: what the pipeline is like now

* pipelines local running and pipeline high level definition

* cleaning old pipeline and vectorization

* leaked but fixing, not so important

* test: started with pipeline step testing

* chore: cleaning up provider of prices

* test: extra tests wit hsemantic meaning checks

* migrating pricers

* feature: introducing pricing predictors (pricers)

* chore: e2e is done with new pipeline

* extra session feature extraction

* feature: experiemntal sessin pricer and metrics(vibe)

* chore: redefined and connected pricers (#29)
2025-11-29 17:50:16 +01:00

62 lines
2.2 KiB
Python
Executable File

import pandas as pd
from procesing.steps.base import BaseContextStep
class ComputeDemandStep(BaseContextStep):
"""
Compute demand vector for a single time window or dataframe.
Input: single chunk dict OR raw dataframe
Output: demand dataframe with [productId, demand_score]
"""
def transform(self, chunk):
# handle both chunk dict and raw dataframe
if isinstance(chunk, dict):
interactions = chunk['data']
window_meta = {k: v for k, v in chunk.items() if k != 'data'}
else:
interactions = chunk
window_meta = {}
products = self.context.products
unique_products = products['id'].unique()
# apply filters if configured
session_filter = self.context.config.get('session_filter')
experiment_filter = self.context.config.get('experiment_filter')
if session_filter and 'sessionId' in interactions.columns:
interactions = interactions[interactions['sessionId'] == session_filter]
if experiment_filter and 'experimentId' in interactions.columns:
interactions = interactions[interactions['experimentId'] == experiment_filter]
interactions_with_products = interactions.dropna(subset=['productId'])
if interactions_with_products.empty:
demand_df = pd.DataFrame({
'productId': unique_products,
'demand_score': 0
})
else:
# crosstab for simple demand count
demand_df = pd.crosstab(
interactions_with_products['productId'],
'count'
).reindex(unique_products, fill_value=0).reset_index()
demand_df.columns = ['productId', 'demand_score']
# attach window metadata if present
if window_meta:
return {**window_meta, 'demand_vector': demand_df}
return demand_df
class ComputeDemandForChunksStep(BaseContextStep):
"""Apply ComputeDemandStep to list of chunks"""
def transform(self, chunks: list):
if not chunks:
return []
demand_step = ComputeDemandStep(self.context)
return [demand_step.transform(chunk) for chunk in chunks]