mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
First pricing implementation (#27)
* first implementation of elasticity demand computation * chor: fixing test :( * feature: rudemantary defintition of pricing pipeline * chor: fixing cross product missing data * add warning * feature: e2e pricing pipeline with inference
This commit is contained in:
committed by
GitHub
parent
8b76d24ade
commit
c432c45343
@@ -2,14 +2,81 @@ from sklearn.base import BaseEstimator, TransformerMixin
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from supabase import create_client, Client
|
||||
import pandas as pd
|
||||
from typing import Optional, Literal
|
||||
import os
|
||||
import logging
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL")
|
||||
SUPABASE_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY")
|
||||
SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL", "")
|
||||
SUPABASE_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY", "")
|
||||
|
||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
||||
|
||||
class ChunkInteractionsIntoSteps(BaseEstimator, TransformerMixin):
|
||||
"""
|
||||
Split interaction data into time windows for temporal analysis.
|
||||
Returns a list of dataframes, one per time window.
|
||||
"""
|
||||
def __init__(self,
|
||||
window_size:str='1h',
|
||||
ts_col:str='ts',
|
||||
return_metadata:bool=True):
|
||||
"""
|
||||
Args:
|
||||
window_size: pandas freq string ('1h', '30T', '1D', etc)
|
||||
ts_col: timestamp column name
|
||||
return_metadata: if True, return dict with metadata per chunk
|
||||
"""
|
||||
self.window_size = window_size
|
||||
self.ts_col = ts_col
|
||||
self.return_metadata = return_metadata
|
||||
|
||||
def fit(self, X):
|
||||
return self
|
||||
|
||||
def transform(self, interactions: pd.DataFrame):
|
||||
"""
|
||||
Returns:
|
||||
if return_metadata=False: list of dataframes, one per window
|
||||
if return_metadata=True: list of dicts with keys:
|
||||
- 'data': dataframe for this window
|
||||
- 'window_start': start timestamp
|
||||
- 'window_end': end timestamp
|
||||
- 'window_idx': integer index
|
||||
"""
|
||||
if interactions.empty:
|
||||
return []
|
||||
|
||||
df = interactions.copy()
|
||||
|
||||
# ensure timestamp is datetime
|
||||
if not pd.api.types.is_datetime64_any_dtype(df[self.ts_col]):
|
||||
df[self.ts_col] = pd.to_datetime(df[self.ts_col])
|
||||
|
||||
# sort by time
|
||||
df = df.sort_values(self.ts_col)
|
||||
|
||||
# assign window
|
||||
df['_window'] = df[self.ts_col].dt.floor(self.window_size)
|
||||
|
||||
# group by window
|
||||
chunks = []
|
||||
for idx, (window_start, group) in enumerate(df.groupby('_window')):
|
||||
chunk_data = group.drop(columns=['_window'])
|
||||
|
||||
if self.return_metadata:
|
||||
chunks.append({
|
||||
'data': chunk_data,
|
||||
'window_start': window_start,
|
||||
'window_end': window_start + pd.Timedelta(self.window_size),
|
||||
'window_idx': idx
|
||||
})
|
||||
else:
|
||||
chunks.append(chunk_data)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
class DemandEstimator(BaseEstimator, TransformerMixin):
|
||||
def __init__(self,
|
||||
store_mode:str='hotel',
|
||||
@@ -28,12 +95,25 @@ class DemandEstimator(BaseEstimator, TransformerMixin):
|
||||
interactions = interactions[interactions['sessionId'] == self.session_filter]
|
||||
if self.experiment_filter:
|
||||
interactions = interactions[interactions['experimentId'] == self.experiment_filter]
|
||||
|
||||
products=supabase.table(f'{self.store}_products').select("id, room_type, date_index, metadata, availability").execute()
|
||||
products = pd.DataFrame(products.data)
|
||||
unique_products = products['id'].unique()
|
||||
log.info(f"Demand estimator found {len(unique_products)} in data")
|
||||
|
||||
# filter out rows without productId
|
||||
interactions_with_products = interactions.dropna(subset=['productId'])
|
||||
|
||||
if interactions_with_products.empty:
|
||||
# no interactions with products, return all zeros
|
||||
return pd.DataFrame({
|
||||
'productId': unique_products,
|
||||
'demand_score': 0
|
||||
})
|
||||
|
||||
# TODO: improve demand score calculation rather than just counting interactions (use weights..)
|
||||
# while maintaining simplicity of a simple cross tab approach
|
||||
product_demand = pd.crosstab(interactions['productId'], "no_of_interactions")
|
||||
product_demand = pd.crosstab(interactions_with_products['productId'], "no_of_interactions")
|
||||
product_demand = product_demand.reindex(unique_products, fill_value=0).reset_index()
|
||||
product_demand.columns = ['productId', 'demand_score']
|
||||
return product_demand
|
||||
|
||||
Reference in New Issue
Block a user