From 4426b0ff74d18ac0467bf2fc1626caeac3fa1772 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sat, 6 Dec 2025 19:06:40 +0100 Subject: [PATCH] chore: update provider and pricing snitch with agnostic system --- experiments/procesing/pipelines.py | 2 +- experiments/procesing/providers/supabase.py | 15 +++++++++++---- experiments/procesing/steps/fetch.py | 12 ++++++++++-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/experiments/procesing/pipelines.py b/experiments/procesing/pipelines.py index 2742254..6837be9 100644 --- a/experiments/procesing/pipelines.py +++ b/experiments/procesing/pipelines.py @@ -131,7 +131,7 @@ if __name__ == '__main__': # example run context = PipelineContext( provider=HistoricalProvider(), - store_mode='hotel', + store_mode='airline', ) product_features, prices = full_pipeline(context) diff --git a/experiments/procesing/providers/supabase.py b/experiments/procesing/providers/supabase.py index 8da01f9..f7bc7b5 100755 --- a/experiments/procesing/providers/supabase.py +++ b/experiments/procesing/providers/supabase.py @@ -18,10 +18,17 @@ class SupabaseProvider(DataProvider): self.supabase: Client = create_client(self.supabase_url, self.supabase_key) def fetch_products(self, store_mode: str) -> pd.DataFrame: - resp = self.supabase.table(f'{store_mode}_products').select( - "id, room_type, date_index, metadata, availability" - ).execute() - return pd.DataFrame(resp.data) if resp.data else pd.DataFrame() + # hotel uses room_type, airline uses flight_type; select all and normalize + resp = self.supabase.table(f'{store_mode}_products').select("*").execute() + if not resp.data: + return pd.DataFrame() + df = pd.DataFrame(resp.data) + # normalize type column: hotel has room_type, airline has flight_type + if 'room_type' in df.columns: + df['product_type'] = df['room_type'] + elif 'flight_type' in df.columns: + df['product_type'] = df['flight_type'] + return df def fetch_experiments(self, experiment_ids: List[str]) -> pd.DataFrame: if not experiment_ids: diff --git a/experiments/procesing/steps/fetch.py b/experiments/procesing/steps/fetch.py index 87c306e..9457e2b 100755 --- a/experiments/procesing/steps/fetch.py +++ b/experiments/procesing/steps/fetch.py @@ -2,7 +2,7 @@ import pandas as pd from procesing.steps.base import BaseContextStep class FetchInteractionsStep(BaseContextStep): - """Fetch raw interaction data from Kafka topic with optional time filtering""" + """Fetch raw interaction data from Kafka topic with optional time and store_mode filtering""" def __init__(self, context, lookback: str = None): super().__init__(context) @@ -24,6 +24,10 @@ class FetchInteractionsStep(BaseContextStep): # drop all where page has /admin/ df = df[~df['page'].str.contains('/admin/', na=False)] + # filter by store_mode from context + if 'storeMode' in df.columns: + df = df[df['storeMode'] == self.context.store_mode] + # Remap dateIndex if present if 'metadata_dateIndex' in df.columns: df['dateIndex'] = df['metadata_dateIndex'].astype('Int64') @@ -38,7 +42,7 @@ class FetchInteractionsStep(BaseContextStep): class FetchPriceLogsStep(BaseContextStep): - """Fetch price log data from Kafka topic with optional time filtering""" + """Fetch price log data from Kafka topic with optional time and store_mode filtering""" def __init__(self, context, lookback: str = None): super().__init__(context) @@ -50,6 +54,10 @@ class FetchPriceLogsStep(BaseContextStep): if df.empty: return df + # filter by store_mode from context + if 'storeMode' in df.columns: + df = df[df['storeMode'] == self.context.store_mode] + # Apply time filtering if lookback specified if self.lookback and 'ts' in df.columns: df['ts'] = pd.to_datetime(df['ts'])