diff --git a/experiments/procesing/elasticity.py b/experiments/procesing/elasticity.py index 7143e26..5e8a7fe 100644 --- a/experiments/procesing/elasticity.py +++ b/experiments/procesing/elasticity.py @@ -52,6 +52,13 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): elasticities = [] for pid, series in product_series.items(): if len(series) < self.min_observations: + # assign 0 elasticity for products with insufficient data + elasticities.append({ + 'productId': pid, + 'elasticity': 0.0, + 'std_error': 0.0, + 'n_obs': len(series) + }) continue # apply smoothing if requested @@ -59,13 +66,12 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): series = self._smooth_series(series, self.smooth_window) elast = self._compute_elasticity(series) - if elast is not None: - elasticities.append({ - 'productId': pid, - 'elasticity': elast['value'], - 'std_error': elast.get('std_error', np.nan), - 'n_obs': len(series) - }) + elasticities.append({ + 'productId': pid, + 'elasticity': elast['value'], + 'std_error': elast.get('std_error', 0.0), + 'n_obs': len(series) + }) return pd.DataFrame(elasticities) @@ -127,7 +133,7 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): def _compute_elasticity(self, series): """Compute elasticity from time series.""" if len(series) < 2: - return None + return {'value': 0.0, 'std_error': 0.0} prices = np.array([s['price'] for s in series]) quantities = np.array([s['quantity'] for s in series]) @@ -135,7 +141,7 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): # filter out zero/negative values valid = (prices > 0) & (quantities > 0) if valid.sum() < 2: - return None + return {'value': 0.0, 'std_error': 0.0} prices = prices[valid] quantities = quantities[valid] @@ -153,23 +159,26 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin): log(Q) = a + b*log(P), elasticity = b """ if len(prices) < 2: - return None + return {'value': 0.0, 'std_error': 0.0} log_p = np.log(prices) log_q = np.log(quantities) # simple linear regression if log_p.std() == 0: - return None + return {'value': 0.0, 'std_error': 0.0} cov = np.cov(log_p, log_q)[0, 1] var = np.var(log_p) b = cov / var - # std error estimate - residuals = log_q - (log_q.mean() + b * (log_p - log_p.mean())) - mse = (residuals ** 2).sum() / (len(prices) - 2) - se_b = np.sqrt(mse / (len(prices) * var)) + # std error estimate (avoid div by zero) + if len(prices) <= 2: + se_b = 0.0 + else: + residuals = log_q - (log_q.mean() + b * (log_p - log_p.mean())) + mse = (residuals ** 2).sum() / (len(prices) - 2) + se_b = np.sqrt(mse / (len(prices) * var)) return {'value': b, 'std_error': se_b} diff --git a/experiments/procesing/pricing.py b/experiments/procesing/pricing.py index cd42263..597fd4e 100644 --- a/experiments/procesing/pricing.py +++ b/experiments/procesing/pricing.py @@ -88,17 +88,28 @@ if __name__ == "__main__": interaction_data = interaction_pipeline.fit_transform(None) price_data = price_data_pipeline.fit_transform(None) - price_elasticity = elasticity_pipeline(interaction_data, price_data, window_size="30s") - price_elasticity = price_elasticity['elasticity'].values if price_elasticity is not None and not price_elasticity.empty else np.array([]) + elasticity_df = elasticity_pipeline(interaction_data, price_data, window_size="30s") - price_data = price_data['price'].values if not price_data.empty else np.array([]) + # align elasticity with price data by productId, fill missing with 0 + if not price_data.empty and elasticity_df is not None and not elasticity_df.empty: + price_data_merged = price_data.merge( + elasticity_df[['productId', 'elasticity']], + on='productId', + how='left' + ).fillna({'elasticity': 0.0}) - print(price_elasticity) - print(price_data) + prices = price_data_merged['price'].values + elasticities = price_data_merged['elasticity'].values + else: + prices = np.array([]) + elasticities = np.array([]) + + print(elasticities) + print(prices) state_space = StateSpace( - demand=price_elasticity, - prices=price_data, + demand=elasticities, + prices=prices, session_features=interaction_data )