import numpy as np import pandas as pd from typing import List, Dict, Optional from sklearn.base import BaseEstimator, TransformerMixin from supabase import create_client, Client import os SUPABASE_URL = os.getenv("NEXT_PUBLIC_SUPABASE_URL", "") SUPABASE_KEY = os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY", "") supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): """ Compute price elasticity from time-series demand and price data. Elasticity = (% change in quantity) / (% change in price) Works with chunked time-window data from ChunkInteractionsIntoSteps. """ def __init__(self, method:str='point', min_observations:int=2, smooth_window:Optional[int]=None): """ Args: method: 'point' (point elasticity) or 'arc' (arc elasticity) min_observations: min data points needed per product smooth_window: if set, apply rolling avg smoothing to time series """ self.method = method self.min_observations = min_observations self.smooth_window = smooth_window def fit(self, X): return self def transform(self, demand_chunks: List[Dict], price_chunks: List[Dict], store_mode: str = 'hotel') -> pd.DataFrame: """ Args: demand_chunks: list from ChunkInteractionsIntoSteps + DemandEstimator each item: {'window_start', 'window_end', 'demand_vector'} price_chunks: list of dicts with {'window_start', 'window_end', 'price_vector'} store_mode: 'hotel' or 'airline' to fetch all products Returns: df with [productId, elasticity, std_error, n_observations] """ # fetch all products from database all_products = supabase.table(f'{store_mode}_products').select("id").execute() all_product_ids = [p['id'] for p in all_products.data] aligned = self._align_chunks(demand_chunks, price_chunks) if not aligned: # return all products with zero elasticity return pd.DataFrame({ 'productId': all_product_ids, 'elasticity': 0.0, 'std_error': 0.0, 'n_obs': 0 }) # build time series per product product_series = self._build_product_timeseries(aligned) # compute elasticity per product elasticities = [] for pid, series in product_series.items(): if len(series) < self.min_observations: # assign 0 elasticity for products with insufficient data elasticities.append({ 'productId': pid, 'elasticity': 0.0, 'std_error': 0.0, 'n_obs': len(series) }) continue # apply smoothing if requested if self.smooth_window and len(series) >= self.smooth_window: series = self._smooth_series(series, self.smooth_window) elast = self._compute_elasticity(series) elasticities.append({ 'productId': pid, 'elasticity': elast['value'], 'std_error': elast.get('std_error', 0.0), 'n_obs': len(series) }) result_df = pd.DataFrame(elasticities) # fill in missing products with zero elasticity observed_pids = set(result_df['productId'].unique()) missing_pids = [pid for pid in all_product_ids if pid not in observed_pids] if missing_pids: missing_df = pd.DataFrame({ 'productId': missing_pids, 'elasticity': 0.0, 'std_error': 0.0, 'n_obs': 0 }) result_df = pd.concat([result_df, missing_df], ignore_index=True) return result_df def _align_chunks(self, demand_chunks, price_chunks): """Align demand and price data by matching time windows.""" aligned = [] # create lookup for price chunks by window_start price_lookup = {chunk['window_start']: chunk for chunk in price_chunks} for demand_chunk in demand_chunks: window_start = demand_chunk['window_start'] if window_start in price_lookup: aligned.append({ 'window_start': window_start, 'window_end': demand_chunk['window_end'], 'demand': demand_chunk['demand_vector'], 'prices': price_lookup[window_start]['price_vector'] }) return aligned def _build_product_timeseries(self, aligned_chunks): """Build time series [price, quantity] per product.""" series_by_product = {} for chunk in aligned_chunks: demand_df = chunk['demand'] price_df = chunk['prices'] # merge on productId merged = demand_df.merge(price_df, on='productId', how='inner') for _, row in merged.iterrows(): pid = row['productId'] if pid not in series_by_product: series_by_product[pid] = [] series_by_product[pid].append({ 'timestamp': chunk['window_start'], 'price': row['price'], 'quantity': row['demand_score'] }) return series_by_product def _smooth_series(self, series, window): """Apply rolling average smoothing.""" df = pd.DataFrame(series) df['price_smooth'] = df['price'].rolling(window=window, center=True).mean() df['quantity_smooth'] = df['quantity'].rolling(window=window, center=True).mean() df = df.dropna() return [{'timestamp': row['timestamp'], 'price': row['price_smooth'], 'quantity': row['quantity_smooth']} for _, row in df.iterrows()] def _compute_elasticity(self, series): """Compute elasticity from time series.""" if len(series) < 2: return {'value': 0.0, 'std_error': 0.0} prices = np.array([s['price'] for s in series]) quantities = np.array([s['quantity'] for s in series]) # filter out zero/negative values valid = (prices > 0) & (quantities > 0) if valid.sum() < 2: return {'value': 0.0, 'std_error': 0.0} prices = prices[valid] quantities = quantities[valid] if self.method == 'point': return self._point_elasticity(prices, quantities) elif self.method == 'arc': return self._arc_elasticity(prices, quantities) else: raise ValueError(f"Unknown method: {self.method}") def _point_elasticity(self, prices, quantities): """ Point elasticity using log-log regression. log(Q) = a + b*log(P), elasticity = b """ if len(prices) < 2: return {'value': 0.0, 'std_error': 0.0} log_p = np.log(prices) log_q = np.log(quantities) # simple linear regression if log_p.std() == 0: return {'value': 0.0, 'std_error': 0.0} cov = np.cov(log_p, log_q)[0, 1] var = np.var(log_p) b = cov / var # std error estimate (avoid div by zero) if len(prices) <= 2: se_b = 0.0 else: residuals = log_q - (log_q.mean() + b * (log_p - log_p.mean())) mse = (residuals ** 2).sum() / (len(prices) - 2) se_b = np.sqrt(mse / (len(prices) * var)) return {'value': b, 'std_error': se_b} def _arc_elasticity(self, prices, quantities): """ Arc elasticity: average of period-over-period elasticities. E_t = (ΔQ/Q_avg) / (ΔP/P_avg) """ elasticities = [] for i in range(1, len(prices)): p1, p2 = prices[i-1], prices[i] q1, q2 = quantities[i-1], quantities[i] p_avg = (p1 + p2) / 2 q_avg = (q1 + q2) / 2 if p_avg == 0 or q_avg == 0: continue delta_p = p2 - p1 delta_q = q2 - q1 if delta_p == 0: continue e = (delta_q / q_avg) / (delta_p / p_avg) elasticities.append(e) if not elasticities: return None return { 'value': np.mean(elasticities), 'std_error': np.std(elasticities) / np.sqrt(len(elasticities)) } def aggregate_price_logs(price_logs: pd.DataFrame, window_size: str = '1H', ts_col: str = 'ts', store_mode : str = 'hotel') -> List[Dict]: """ Recover price vectors treating prices as persistent state changes. Prices are set-operations that persist until next change. For each window: - If price logs exist: average all changes within window - If no logs: carry forward last price before window end Args: price_logs: df with [productId, price, ts, ...] window_size: time window size matching ChunkInteractionsIntoSteps ts_col: timestamp column name Returns: list of dicts with {'window_start', 'window_end', 'price_vector'} where price_vector is df with [productId, price] """ if price_logs.empty: return [] df = price_logs.copy() if not pd.api.types.is_datetime64_any_dtype(df[ts_col]): df[ts_col] = pd.to_datetime(df[ts_col]) df = df.sort_values([ts_col, 'productId']) all_products=supabase.table(f'{store_mode}_products').select("id, room_type, date_index, metadata, availability").execute() all_products = pd.DataFrame(all_products.data) unique_products = all_products['id'].unique() # generate windows across data range min_time, max_time = df[ts_col].min(), df[ts_col].max() windows = pd.date_range( start=min_time.floor(window_size), end=max_time, freq=window_size ) chunks = [] for window_start in windows: window_end = window_start + pd.Timedelta(window_size) price_vector = [] # all products with price history by window_end #historical_products = df[df[ts_col] < window_end]['productId'].unique() historical_products = unique_products.tolist() for pid in historical_products: product_data = df[df['productId'] == pid] # logs within window in_window = product_data[ (product_data[ts_col] >= window_start) & (product_data[ts_col] < window_end) ] if not in_window.empty: # average changes within window price = in_window['price'].mean() else: # carry forward: last price before window end before_window = product_data[product_data[ts_col] < window_end] if before_window.empty: continue price = before_window['price'].iloc[-1] price_vector.append({'productId': pid, 'price': price}) if price_vector: chunks.append({ 'window_start': window_start, 'window_end': window_end, 'price_vector': pd.DataFrame(price_vector) }) return chunks