Improving interface after experiment01 (#30)

* fix: fixes of backwords * fixing hotel information with image placeholders * chore: clean up product display in hotel and cleaner interfacing * adding loader with historical data loading * feature: cleaning up pipeline * chore: simple surge pricer * created new pricing pipeline * adding a checkout page to both sites * fix: fixing stale pacakge * test: we wont be using elasticity anymore so its okay * chore: cleaning elasticity references * chore: store sting * feature: e2e intro pipline surge pricing * fix: CVE vulnerability patching
2026-07-16 01:53:37 +00:00 · 2025-12-06 17:47:14 +01:00
parent 59d4fb7891
commit 8751583764
27 changed files with 709 additions and 1096 deletions
--- a/experiments/procesing/pipelines.py
+++ b/experiments/procesing/pipelines.py
@@ -2,7 +2,6 @@ from sklearn.pipeline import Pipeline
 import pandas as pd
 from procesing.context import PipelineContext
 from procesing.providers import SupabaseProvider, BackendAPIProvider
-from typing import Union
 from procesing.steps import (
    FetchInteractionsStep,
    FetchPriceLogsStep,
@@ -13,11 +12,13 @@ from procesing.steps import (
    ChunkByTimeWindowStep,
    ComputeDemandForChunksStep,
    AggregatePriceLogsStep,
-    ComputeElasticityStep,
    # BuildStateSpaceStep,
    FitPricingFunctionStep,
    PredictPricesStep,
+    ComputeDemandStep,
+    JoinProductFeaturesStep
 )
+from procesing.pricers import SimpleSurgePricer

 def interaction_extraction_pipeline(context: PipelineContext):
    """Pipeline for extracting and augmenting interaction data"""
@@ -35,80 +36,76 @@ def price_extraction_pipeline(context: PipelineContext):
    ])


-def elasticity_computation_pipeline(context: PipelineContext,
+def product_features_pipeline(context: PipelineContext,
                                    interactions_df: pd.DataFrame,
                                    price_logs_df: pd.DataFrame):
-    """
-    Compute elasticity from interactions and price logs.
-    Manual orchestration needed for branching logic.
-    """
-    # branch 1: chunk interactions and compute demand
-    chunk_step = ChunkByTimeWindowStep(context)
-    interaction_chunks = chunk_step.transform(interactions_df)
-
-    demand_step = ComputeDemandForChunksStep(context)
-    demand_chunks = demand_step.transform(interaction_chunks)
-
-    # branch 2: aggregate price logs
+    demand_step = ComputeDemandStep(context)
    price_step = AggregatePriceLogsStep(context)
-    price_chunks = price_step.transform(price_logs_df)
-
-    # convergence: compute elasticity
-    elasticity_step = ComputeElasticityStep(context)
-    elasticity_df = elasticity_step.transform((demand_chunks, price_chunks))
-
-    return elasticity_df
+    join_step = JoinProductFeaturesStep(context)


-def pricing_pipeline(context: PipelineContext, elasticity_df: pd.DataFrame):
+    demand_data = demand_step.transform(interactions_df)
+    price_data= price_step.transform(price_logs_df)
+    joined_data = join_step.transform((demand_data, price_data))
+
+    return joined_data
+
+
+
+def pricing_pipeline(context: "PipelineContext",
+                     data: pd.DataFrame,
+                     high_threshold: int = 10,
+                     low_threshold: int = 2,
+                     surge_multiplier: float = 1.2,
+                     discount_multiplier: float = 0.9) -> pd.DataFrame:
+
+    if data.empty or 'productId' not in data.columns:
+        return pd.DataFrame()
+
+    surge_pricer = SimpleSurgePricer()
+    surge_pricer.fit(data)
+    data['optimal_price'] = surge_pricer.predict()
+    return data
+
+
+def full_pipeline(context: PipelineContext,
+                  high_threshold: int = 10,
+                  low_threshold: int = 2,
+                  surge_multiplier: float = 1.2,
+                  discount_multiplier: float = 0.9):
    """
-    Generate optimal prices from elasticity estimates.
+    Complete end-to-end pipeline: data extraction -> demand/price aggregation -> surge pricing
+
+    Args:
+        context: Pipeline context
+        high_threshold: Demand threshold for surge pricing
+        low_threshold: Demand threshold for discounts
+        surge_multiplier: Price multiplier for high demand
+        discount_multiplier: Price multiplier for low demand
+
+    Returns:
+        tuple: (product_features_df, optimal_prices_df)
+            - product_features_df: [productId, demand_score, price]
+            - optimal_prices_df: [productId, current_price, optimal_price, demand_score]
    """
-    # build state space
-    state_step = BuildStateSpaceStep(context)
-    state_space = state_step.transform(elasticity_df)
-
-    # fit pricing function
-    fit_step = FitPricingFunctionStep(context)
-    pricer = fit_step.transform(elasticity_df)
-
-    # predict prices
-    predict_step = PredictPricesStep(context)
-    prices_df = predict_step.transform((pricer, state_space))
-
-    return prices_df
-
-
-def full_pipeline(context: PipelineContext):
-    """
-    Complete end-to-end pipeline: data extraction -> elasticity -> pricing
-    Returns: (elasticity_df, prices_df)
-    """
-    # extract interactions
    interaction_pipe = interaction_extraction_pipeline(context)
-    interactions_df = interaction_pipe.fit_transform(None)
-
-    # extract price logs
    price_pipe = price_extraction_pipeline(context)
+
+    interactions_df = interaction_pipe.fit_transform(None)
    price_logs_df = price_pipe.fit_transform(None)
+    product_features_df = product_features_pipeline(context, interactions_df, price_logs_df)
+    print(product_features_df.to_string())

-    if interactions_df.empty or price_logs_df.empty:
-        return None, None
+    # generate optimal prices using surge rules
+    optimal_prices_df = pricing_pipeline(context, product_features_df,
+                                          high_threshold=high_threshold,
+                                          low_threshold=low_threshold,
+                                          surge_multiplier=surge_multiplier,
+                                          discount_multiplier=discount_multiplier)

-    # compute elasticity
-    elasticity_df = elasticity_computation_pipeline(
-        context,
-        interactions_df,
-        price_logs_df
-    )
+    return product_features_df, optimal_prices_df

-    if elasticity_df is None or elasticity_df.empty:
-        return elasticity_df, None

-    # generate prices
-    prices_df = pricing_pipeline(context, elasticity_df)
-
-    return elasticity_df, prices_df


 if __name__ == '__main__':
@@ -117,24 +114,25 @@ if __name__ == '__main__':
        def __init__(self, backend_url: str):
            SupabaseProvider.__init__(self)
            BackendAPIProvider.__init__(self, backend_url=backend_url)
+
+
+    class HistoricalProvider(SupabaseProvider, BackendAPIProvider):
+        def fetch_kafka_topic(self, topic: str) -> pd.DataFrame:
+            path = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/858c61ab-0a7f-4595-ae49-33f4365517b9/"
+            interactions_file = "messages(2).json"
+            prices_file = "messages(3).json"
+
+            data = pd.read_json(path + (interactions_file if topic == "user-interactions" else prices_file))
+            data = [r['payload'] for r in data['value'].to_list()]
+            data = pd.DataFrame(data)
+            return data
+
+
    # example run
    context = PipelineContext(
-        provider=Provider(backend_url="http://localhost:5000"),
+        provider=HistoricalProvider(),
        store_mode='hotel',
-        # 15 min not month
-        window_size='15min',
    )

-    elasticity_df, prices_df = full_pipeline(context)
-
-    if elasticity_df is not None and not elasticity_df.empty:
-        print("Elasticity Estimates:")
-        print(elasticity_df.to_string(index=False))
-    else:
-        print("No elasticity estimates computed.")
-
-    if prices_df is not None and not prices_df.empty:
-        print("\nPredicted Prices:")
-        print(prices_df.to_string(index=False))
-    else:
-        print("No prices predicted.")
+    product_features, prices = full_pipeline(context)
+    print(prices.to_string())