mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
* first implementation of elasticity demand computation * chor: fixing test :( * feature: rudemantary defintition of pricing pipeline * chor: fixing cross product missing data * add warning * feature: e2e pricing pipeline with inference
120 lines
4.4 KiB
Python
120 lines
4.4 KiB
Python
from sklearn.base import BaseEstimator, TransformerMixin
|
|
import numpy as np
|
|
import pandas as pd
|
|
from supabase import create_client, Client
|
|
from typing import Optional, Literal
|
|
import os
|
|
import logging
|
|
log = logging.getLogger(__name__)
|
|
|
|
SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL", "")
|
|
SUPABASE_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY", "")
|
|
|
|
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
|
|
class ChunkInteractionsIntoSteps(BaseEstimator, TransformerMixin):
|
|
"""
|
|
Split interaction data into time windows for temporal analysis.
|
|
Returns a list of dataframes, one per time window.
|
|
"""
|
|
def __init__(self,
|
|
window_size:str='1h',
|
|
ts_col:str='ts',
|
|
return_metadata:bool=True):
|
|
"""
|
|
Args:
|
|
window_size: pandas freq string ('1h', '30T', '1D', etc)
|
|
ts_col: timestamp column name
|
|
return_metadata: if True, return dict with metadata per chunk
|
|
"""
|
|
self.window_size = window_size
|
|
self.ts_col = ts_col
|
|
self.return_metadata = return_metadata
|
|
|
|
def fit(self, X):
|
|
return self
|
|
|
|
def transform(self, interactions: pd.DataFrame):
|
|
"""
|
|
Returns:
|
|
if return_metadata=False: list of dataframes, one per window
|
|
if return_metadata=True: list of dicts with keys:
|
|
- 'data': dataframe for this window
|
|
- 'window_start': start timestamp
|
|
- 'window_end': end timestamp
|
|
- 'window_idx': integer index
|
|
"""
|
|
if interactions.empty:
|
|
return []
|
|
|
|
df = interactions.copy()
|
|
|
|
# ensure timestamp is datetime
|
|
if not pd.api.types.is_datetime64_any_dtype(df[self.ts_col]):
|
|
df[self.ts_col] = pd.to_datetime(df[self.ts_col])
|
|
|
|
# sort by time
|
|
df = df.sort_values(self.ts_col)
|
|
|
|
# assign window
|
|
df['_window'] = df[self.ts_col].dt.floor(self.window_size)
|
|
|
|
# group by window
|
|
chunks = []
|
|
for idx, (window_start, group) in enumerate(df.groupby('_window')):
|
|
chunk_data = group.drop(columns=['_window'])
|
|
|
|
if self.return_metadata:
|
|
chunks.append({
|
|
'data': chunk_data,
|
|
'window_start': window_start,
|
|
'window_end': window_start + pd.Timedelta(self.window_size),
|
|
'window_idx': idx
|
|
})
|
|
else:
|
|
chunks.append(chunk_data)
|
|
|
|
return chunks
|
|
|
|
|
|
class DemandEstimator(BaseEstimator, TransformerMixin):
|
|
def __init__(self,
|
|
store_mode:str='hotel',
|
|
session_filter:str="",
|
|
experiment_filter:str=""):
|
|
self.store=store_mode
|
|
self.session_filter=session_filter if len(session_filter)>0 else None
|
|
self.experiment_filter=experiment_filter if len(experiment_filter)>0 else None
|
|
def fit(self, X):
|
|
return self
|
|
|
|
def transform(self, interactions : pd.DataFrame):
|
|
if interactions.empty:
|
|
return pd.DataFrame(columns=["productId", "demand_score"])
|
|
if self.session_filter:
|
|
interactions = interactions[interactions['sessionId'] == self.session_filter]
|
|
if self.experiment_filter:
|
|
interactions = interactions[interactions['experimentId'] == self.experiment_filter]
|
|
|
|
products=supabase.table(f'{self.store}_products').select("id, room_type, date_index, metadata, availability").execute()
|
|
products = pd.DataFrame(products.data)
|
|
unique_products = products['id'].unique()
|
|
log.info(f"Demand estimator found {len(unique_products)} in data")
|
|
|
|
# filter out rows without productId
|
|
interactions_with_products = interactions.dropna(subset=['productId'])
|
|
|
|
if interactions_with_products.empty:
|
|
# no interactions with products, return all zeros
|
|
return pd.DataFrame({
|
|
'productId': unique_products,
|
|
'demand_score': 0
|
|
})
|
|
|
|
# TODO: improve demand score calculation rather than just counting interactions (use weights..)
|
|
# while maintaining simplicity of a simple cross tab approach
|
|
product_demand = pd.crosstab(interactions_with_products['productId'], "no_of_interactions")
|
|
product_demand = product_demand.reindex(unique_products, fill_value=0).reset_index()
|
|
product_demand.columns = ['productId', 'demand_score']
|
|
return product_demand
|