Files
PHANTOM/experiments/procesing/steps/fetch.py
Daniel Alves Rösel 8751583764 Improving interface after experiment01 (#30)
* fix: fixes of backwords

* fixing hotel information with image placeholders

* chore: clean up product display in hotel and cleaner interfacing

* adding loader with historical data loading

* feature: cleaning up pipeline

* chore: simple surge pricer

* created new pricing pipeline

* adding a checkout page to both sites

* fix: fixing stale pacakge

* test: we wont be using elasticity anymore so its okay

* chore: cleaning elasticity references

* chore: store sting

* feature: e2e intro pipline surge pricing

* fix: CVE vulnerability patching
2025-12-06 17:47:14 +01:00

74 lines
2.4 KiB
Python
Executable File

import pandas as pd
from procesing.steps.base import BaseContextStep
class FetchInteractionsStep(BaseContextStep):
"""Fetch raw interaction data from Kafka topic with optional time filtering"""
def __init__(self, context, lookback: str = None):
super().__init__(context)
self.lookback = lookback
def transform(self, X=None):
df = self.context.provider.fetch_kafka_topic('user-interactions')
if df.empty:
return df
# Explode metadata JSON column
if 'metadata' in df.columns:
df = df.join(
pd.json_normalize(df.pop('metadata'), sep='.').add_prefix('metadata_')
)
df = df.dropna(subset=['eventName'])
# drop all where page has /admin/
df = df[~df['page'].str.contains('/admin/', na=False)]
# Remap dateIndex if present
if 'metadata_dateIndex' in df.columns:
df['dateIndex'] = df['metadata_dateIndex'].astype('Int64')
# Apply time filtering if lookback specified
if self.lookback and 'ts' in df.columns:
df['ts'] = pd.to_datetime(df['ts'])
cutoff = pd.Timestamp.now() - pd.Timedelta(self.lookback)
df = df[df['ts'] >= cutoff]
return df
class FetchPriceLogsStep(BaseContextStep):
"""Fetch price log data from Kafka topic with optional time filtering"""
def __init__(self, context, lookback: str = None):
super().__init__(context)
self.lookback = lookback
def transform(self, X=None):
df = self.context.provider.fetch_kafka_topic('price-logs')
if df.empty:
return df
# Apply time filtering if lookback specified
if self.lookback and 'ts' in df.columns:
df['ts'] = pd.to_datetime(df['ts'])
cutoff = pd.Timestamp.now() - pd.Timedelta(self.lookback)
df = df[df['ts'] >= cutoff]
return df
class FetchExperimentsStep(BaseContextStep):
"""Fetch experiment metadata for given interaction data"""
def transform(self, interactions_df: pd.DataFrame):
if interactions_df.empty or 'experimentId' not in interactions_df.columns:
return pd.DataFrame()
exp_ids = interactions_df['experimentId'].dropna().unique().tolist()
if not exp_ids:
return pd.DataFrame()
return self.context.provider.fetch_experiments(exp_ids)