First pricing implementation (#27)

* first implementation of elasticity demand computation * chor: fixing test :( * feature: rudemantary defintition of pricing pipeline * chor: fixing cross product missing data * add warning * feature: e2e pricing pipeline with inference
2026-05-31 16:43:36 +00:00 · 2025-11-27 18:25:27 +01:00
parent 8b76d24ade
commit c432c45343
8 changed files with 829 additions and 39 deletions
--- a/experiments/procesing/pipeline.py
+++ b/experiments/procesing/pipeline.py
@@ -1,22 +1,90 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
+import pandas as pd
+import logging
+log = logging.getLogger(__name__)

-from extract import KafkaDataFetcher, ExperimentJoiner, EventTitleAugmenter
+from extract import KafkaDataFetcher, ExperimentJoiner, EventTitleAugmenter, chunk_shared_data
 from mapping import SessionTransitionProbMatrixTransformer, render_graph
-from demand import DemandEstimator
+from demand import DemandEstimator, ChunkInteractionsIntoSteps
+from elasticity import TemporalElasticityEstimator, aggregate_price_logs
+
+
+
+# elasticity pipeline components (not sklearn compatible, manual orchestration)
+def elasticity_pipeline(interactions_df, price_logs_df, window_size='30s', store_mode='hotel'):
+    """
+    Compute price elasticity from interaction and price data.
+
+    Args:
+        interactions_df: raw interaction data from demand_data_pipeline
+        price_logs_df: price log data from price_data_pipeline
+        window_size: time window for chunking
+        store_mode: 'hotel' or 'airline'
+
+    Returns:
+        df with [productId, elasticity, std_error, n_obs]
+    """
+    # step 1: chunk interactions into time windows
+    chunker = ChunkInteractionsIntoSteps(window_size=window_size, return_metadata=True)
+    interaction_chunks = chunker.transform(interactions_df)
+    log.info(f"Chunked interactions into {len(interaction_chunks)} windows of size {window_size}")
+
+    if not interaction_chunks:
+        return None
+
+    # step 2: compute demand per window
+    demand_estimator = DemandEstimator(store_mode=store_mode)
+    demand_chunks = []
+    for chunk in interaction_chunks:
+        demand_vector = demand_estimator.transform(chunk['data'])
+        demand_chunks.append({
+            'window_start': chunk['window_start'],
+            'window_end': chunk['window_end'],
+            'demand_vector': demand_vector # each has a full list of all products, even if demand is 0
+        })
+    # [q_chunk1, q_chunk2, ...]
+
+    # step 3: aggregate price logs into windows
+    price_chunks = aggregate_price_logs(price_logs_df, window_size=window_size)
+
+    # step 4: compute elasticity
+    elasticity_estimator = TemporalElasticityEstimator(method='point', min_observations=2)
+    elasticity_df = elasticity_estimator.transform(demand_chunks, price_chunks, store_mode=store_mode)
+
+    return elasticity_df


 # exposable pipelines
-etl_pipeline = Pipeline([
-    ('kafka_fetch', KafkaDataFetcher()),
+interaction_pipeline = Pipeline([
+    ('kafka_fetch', KafkaDataFetcher(topic='user-interactions')),
    ('experiment_join', ExperimentJoiner()),
    ('event_augment', EventTitleAugmenter()),
 ])
+
+price_data_pipeline = Pipeline([
+    ('kafka_fetch', KafkaDataFetcher(topic='price-logs')),
+])
+
+# interaction_data + price_data -> elasticity (demand)
+# elasticity -> pricing
+
 pricing_pipeline = Pipeline([
    ('demand_estimation', DemandEstimator()),
 ])
-
 if __name__ == "__main__":
-    processed_data = etl_pipeline.fit_transform(None)
-    pricing = pricing_pipeline.fit_transform(processed_data)
-    print(pricing)
+    # fetch both datasets
+    interaction_data = interaction_pipeline.fit_transform(None)
+    pricing_data = price_data_pipeline.fit_transform(None)
+    if interaction_data.empty or pricing_data.empty:
+        print("Insufficient data for elasticity computation"); exit(0)
+    # compute elasticity via unified pipeline
+    window_size = "30s"
+    elasticity_results = elasticity_pipeline(interaction_data, pricing_data, window_size=window_size)
+    elasticity_value_array = elasticity_results['elasticity'].values if elasticity_results is not None else np.array([])
+    print(elasticity_value_array)
+
+    if elasticity_results is not None and not elasticity_results.empty:
+        print(elasticity_results.to_string(index=False))
+    else:
+        print("\nInsufficient data for elasticity computation")