chor: fixing cross product missing data

This commit is contained in:
2025-11-27 00:36:25 +01:00
parent 633edcd76b
commit 07262e5c8f
2 changed files with 42 additions and 22 deletions

View File

@@ -52,6 +52,13 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
elasticities = [] elasticities = []
for pid, series in product_series.items(): for pid, series in product_series.items():
if len(series) < self.min_observations: if len(series) < self.min_observations:
# assign 0 elasticity for products with insufficient data
elasticities.append({
'productId': pid,
'elasticity': 0.0,
'std_error': 0.0,
'n_obs': len(series)
})
continue continue
# apply smoothing if requested # apply smoothing if requested
@@ -59,13 +66,12 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
series = self._smooth_series(series, self.smooth_window) series = self._smooth_series(series, self.smooth_window)
elast = self._compute_elasticity(series) elast = self._compute_elasticity(series)
if elast is not None: elasticities.append({
elasticities.append({ 'productId': pid,
'productId': pid, 'elasticity': elast['value'],
'elasticity': elast['value'], 'std_error': elast.get('std_error', 0.0),
'std_error': elast.get('std_error', np.nan), 'n_obs': len(series)
'n_obs': len(series) })
})
return pd.DataFrame(elasticities) return pd.DataFrame(elasticities)
@@ -127,7 +133,7 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
def _compute_elasticity(self, series): def _compute_elasticity(self, series):
"""Compute elasticity from time series.""" """Compute elasticity from time series."""
if len(series) < 2: if len(series) < 2:
return None return {'value': 0.0, 'std_error': 0.0}
prices = np.array([s['price'] for s in series]) prices = np.array([s['price'] for s in series])
quantities = np.array([s['quantity'] for s in series]) quantities = np.array([s['quantity'] for s in series])
@@ -135,7 +141,7 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
# filter out zero/negative values # filter out zero/negative values
valid = (prices > 0) & (quantities > 0) valid = (prices > 0) & (quantities > 0)
if valid.sum() < 2: if valid.sum() < 2:
return None return {'value': 0.0, 'std_error': 0.0}
prices = prices[valid] prices = prices[valid]
quantities = quantities[valid] quantities = quantities[valid]
@@ -153,23 +159,26 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
log(Q) = a + b*log(P), elasticity = b log(Q) = a + b*log(P), elasticity = b
""" """
if len(prices) < 2: if len(prices) < 2:
return None return {'value': 0.0, 'std_error': 0.0}
log_p = np.log(prices) log_p = np.log(prices)
log_q = np.log(quantities) log_q = np.log(quantities)
# simple linear regression # simple linear regression
if log_p.std() == 0: if log_p.std() == 0:
return None return {'value': 0.0, 'std_error': 0.0}
cov = np.cov(log_p, log_q)[0, 1] cov = np.cov(log_p, log_q)[0, 1]
var = np.var(log_p) var = np.var(log_p)
b = cov / var b = cov / var
# std error estimate # std error estimate (avoid div by zero)
residuals = log_q - (log_q.mean() + b * (log_p - log_p.mean())) if len(prices) <= 2:
mse = (residuals ** 2).sum() / (len(prices) - 2) se_b = 0.0
se_b = np.sqrt(mse / (len(prices) * var)) else:
residuals = log_q - (log_q.mean() + b * (log_p - log_p.mean()))
mse = (residuals ** 2).sum() / (len(prices) - 2)
se_b = np.sqrt(mse / (len(prices) * var))
return {'value': b, 'std_error': se_b} return {'value': b, 'std_error': se_b}

View File

@@ -88,17 +88,28 @@ if __name__ == "__main__":
interaction_data = interaction_pipeline.fit_transform(None) interaction_data = interaction_pipeline.fit_transform(None)
price_data = price_data_pipeline.fit_transform(None) price_data = price_data_pipeline.fit_transform(None)
price_elasticity = elasticity_pipeline(interaction_data, price_data, window_size="30s") elasticity_df = elasticity_pipeline(interaction_data, price_data, window_size="30s")
price_elasticity = price_elasticity['elasticity'].values if price_elasticity is not None and not price_elasticity.empty else np.array([])
price_data = price_data['price'].values if not price_data.empty else np.array([]) # align elasticity with price data by productId, fill missing with 0
if not price_data.empty and elasticity_df is not None and not elasticity_df.empty:
price_data_merged = price_data.merge(
elasticity_df[['productId', 'elasticity']],
on='productId',
how='left'
).fillna({'elasticity': 0.0})
print(price_elasticity) prices = price_data_merged['price'].values
print(price_data) elasticities = price_data_merged['elasticity'].values
else:
prices = np.array([])
elasticities = np.array([])
print(elasticities)
print(prices)
state_space = StateSpace( state_space = StateSpace(
demand=price_elasticity, demand=elasticities,
prices=price_data, prices=prices,
session_features=interaction_data session_features=interaction_data
) )