mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
chore: refactored and broke down components (braking
This commit is contained in:
46
experiments/procesing/steps/fetch.py
Executable file
46
experiments/procesing/steps/fetch.py
Executable file
@@ -0,0 +1,46 @@
|
||||
import pandas as pd
|
||||
from .base import BaseContextStep
|
||||
|
||||
class FetchInteractionsStep(BaseContextStep):
|
||||
"""Fetch raw interaction data from Kafka topic"""
|
||||
|
||||
def transform(self, X=None):
|
||||
df = self.context.provider.fetch_kafka_topic('user-interactions')
|
||||
|
||||
if df.empty:
|
||||
return df
|
||||
|
||||
# Explode metadata JSON column
|
||||
if 'metadata' in df.columns:
|
||||
df = df.join(
|
||||
pd.json_normalize(df.pop('metadata'), sep='.').add_prefix('metadata_')
|
||||
)
|
||||
|
||||
df = df.dropna(subset=['eventName'])
|
||||
|
||||
# Remap dateIndex if present
|
||||
if 'metadata_dateIndex' in df.columns:
|
||||
df['dateIndex'] = df['metadata_dateIndex'].astype('Int64')
|
||||
|
||||
return df
|
||||
|
||||
|
||||
class FetchPriceLogsStep(BaseContextStep):
|
||||
"""Fetch price log data from Kafka topic"""
|
||||
|
||||
def transform(self, X=None):
|
||||
return self.context.provider.fetch_kafka_topic('price-logs')
|
||||
|
||||
|
||||
class FetchExperimentsStep(BaseContextStep):
|
||||
"""Fetch experiment metadata for given interaction data"""
|
||||
|
||||
def transform(self, interactions_df: pd.DataFrame):
|
||||
if interactions_df.empty or 'experimentId' not in interactions_df.columns:
|
||||
return pd.DataFrame()
|
||||
|
||||
exp_ids = interactions_df['experimentId'].dropna().unique().tolist()
|
||||
if not exp_ids:
|
||||
return pd.DataFrame()
|
||||
|
||||
return self.context.provider.fetch_experiments(exp_ids)
|
||||
Reference in New Issue
Block a user