first implementation of elasticity demand computation

This commit is contained in:
2025-11-25 22:27:38 +01:00
parent 8b76d24ade
commit c639d99be2
8 changed files with 616 additions and 38 deletions

View File

@@ -2,7 +2,7 @@ from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
import pandas as pd
from supabase import create_client, Client
import pandas as pd
from typing import Optional, Literal
import os
SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL")
@@ -10,6 +10,71 @@ SUPABASE_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
class ChunkInteractionsIntoSteps(BaseEstimator, TransformerMixin):
"""
Split interaction data into time windows for temporal analysis.
Returns a list of dataframes, one per time window.
"""
def __init__(self,
window_size:str='1h',
ts_col:str='ts',
return_metadata:bool=True):
"""
Args:
window_size: pandas freq string ('1h', '30T', '1D', etc)
ts_col: timestamp column name
return_metadata: if True, return dict with metadata per chunk
"""
self.window_size = window_size
self.ts_col = ts_col
self.return_metadata = return_metadata
def fit(self, X):
return self
def transform(self, interactions: pd.DataFrame):
"""
Returns:
if return_metadata=False: list of dataframes, one per window
if return_metadata=True: list of dicts with keys:
- 'data': dataframe for this window
- 'window_start': start timestamp
- 'window_end': end timestamp
- 'window_idx': integer index
"""
if interactions.empty:
return []
df = interactions.copy()
# ensure timestamp is datetime
if not pd.api.types.is_datetime64_any_dtype(df[self.ts_col]):
df[self.ts_col] = pd.to_datetime(df[self.ts_col])
# sort by time
df = df.sort_values(self.ts_col)
# assign window
df['_window'] = df[self.ts_col].dt.floor(self.window_size)
# group by window
chunks = []
for idx, (window_start, group) in enumerate(df.groupby('_window')):
chunk_data = group.drop(columns=['_window'])
if self.return_metadata:
chunks.append({
'data': chunk_data,
'window_start': window_start,
'window_end': window_start + pd.Timedelta(self.window_size),
'window_idx': idx
})
else:
chunks.append(chunk_data)
return chunks
class DemandEstimator(BaseEstimator, TransformerMixin):
def __init__(self,
store_mode:str='hotel',
@@ -28,12 +93,24 @@ class DemandEstimator(BaseEstimator, TransformerMixin):
interactions = interactions[interactions['sessionId'] == self.session_filter]
if self.experiment_filter:
interactions = interactions[interactions['experimentId'] == self.experiment_filter]
products=supabase.table(f'{self.store}_products').select("id, room_type, date_index, metadata, availability").execute()
products = pd.DataFrame(products.data)
unique_products = products['id'].unique()
# filter out rows without productId
interactions_with_products = interactions.dropna(subset=['productId'])
if interactions_with_products.empty:
# no interactions with products, return all zeros
return pd.DataFrame({
'productId': unique_products,
'demand_score': 0
})
# TODO: improve demand score calculation rather than just counting interactions (use weights..)
# while maintaining simplicity of a simple cross tab approach
product_demand = pd.crosstab(interactions['productId'], "no_of_interactions")
product_demand = pd.crosstab(interactions_with_products['productId'], "no_of_interactions")
product_demand = product_demand.reindex(unique_products, fill_value=0).reset_index()
product_demand.columns = ['productId', 'demand_score']
return product_demand