from sklearn.pipeline import Pipeline import pandas as pd from procesing.context import PipelineContext from procesing.providers import SupabaseProvider, BackendAPIProvider from procesing.steps import ( FetchInteractionsStep, FetchPriceLogsStep, FetchExperimentsStep, JoinExperimentsStep, CreatePriceBucketsStep, AugmentEventNamesStep, ChunkByTimeWindowStep, ComputeDemandForChunksStep, AggregatePriceLogsStep, # BuildStateSpaceStep, FitPricingFunctionStep, PredictPricesStep, ComputeDemandStep, JoinProductFeaturesStep ) def interaction_extraction_pipeline(context: PipelineContext): """Pipeline for extracting and augmenting interaction data""" return Pipeline([ ('fetch', FetchInteractionsStep(context)), ('create_buckets', CreatePriceBucketsStep(context)), ('augment_events', AugmentEventNamesStep(context)), ]) def price_extraction_pipeline(context: PipelineContext): """Pipeline for extracting price logs""" return Pipeline([ ('fetch', FetchPriceLogsStep(context)), ]) def product_features_pipeline(context: PipelineContext, interactions_df: pd.DataFrame, price_logs_df: pd.DataFrame): demand_step = ComputeDemandStep(context) price_step = AggregatePriceLogsStep(context) join_step = JoinProductFeaturesStep(context) demand_data = demand_step.transform(interactions_df) price_data= price_step.transform(price_logs_df) joined_data = join_step.transform((demand_data, price_data)) return joined_data def pricing_pipeline(context: "PipelineContext", data: pd.DataFrame, high_threshold: int = 10, low_threshold: int = 2, surge_multiplier: float = 1.2, discount_multiplier: float = 0.9) -> pd.DataFrame: """ Generate product-level optimal prices using simple surge pricing rules. Replaces complex Bayesian curve fitting with threshold-based adjustments. Args: context: Pipeline context data: DataFrame with [productId, demand_score, price] high_threshold: Demand threshold for surge pricing (default 10) low_threshold: Demand threshold for discounts (default 2) surge_multiplier: Price multiplier for high demand (default 1.2 = +20%) discount_multiplier: Price multiplier for low demand (default 0.9 = -10%) Returns: DataFrame with [productId, current_price, optimal_price, demand_score] """ if data.empty or 'productId' not in data.columns: return pd.DataFrame() products = context.products results = [] for pid in data['productId'].unique(): prod_data = data[data['productId'] == pid] if prod_data.empty: continue demand = prod_data["demand_score"].mean() current_price = prod_data["price"].mean() # get base price from metadata or use current price prod_meta = products[products['id'] == pid] if not prod_meta.empty: meta = prod_meta.iloc[0]['metadata'] base_price = meta.get('base_price', current_price) if isinstance(meta, dict) else current_price else: base_price = current_price # apply surge rules if demand >= high_threshold: optimal_price = base_price * surge_multiplier elif demand <= low_threshold: optimal_price = base_price * discount_multiplier else: optimal_price = base_price results.append({ 'productId': pid, 'current_price': current_price, 'base_price': base_price, 'optimal_price': optimal_price, 'demand_score': demand }) return pd.DataFrame(results) def full_pipeline(context: PipelineContext, high_threshold: int = 10, low_threshold: int = 2, surge_multiplier: float = 1.2, discount_multiplier: float = 0.9): """ Complete end-to-end pipeline: data extraction -> demand/price aggregation -> surge pricing Args: context: Pipeline context high_threshold: Demand threshold for surge pricing low_threshold: Demand threshold for discounts surge_multiplier: Price multiplier for high demand discount_multiplier: Price multiplier for low demand Returns: tuple: (product_features_df, optimal_prices_df) - product_features_df: [productId, demand_score, price] - optimal_prices_df: [productId, current_price, optimal_price, demand_score] """ interaction_pipe = interaction_extraction_pipeline(context) price_pipe = price_extraction_pipeline(context) interactions_df = interaction_pipe.fit_transform(None) price_logs_df = price_pipe.fit_transform(None) product_features_df = product_features_pipeline(context, interactions_df, price_logs_df) print(product_features_df.to_string()) # generate optimal prices using surge rules optimal_prices_df = pricing_pipeline(context, product_features_df, high_threshold=high_threshold, low_threshold=low_threshold, surge_multiplier=surge_multiplier, discount_multiplier=discount_multiplier) return product_features_df, optimal_prices_df if __name__ == '__main__': class Provider(SupabaseProvider, BackendAPIProvider): def __init__(self, backend_url: str): SupabaseProvider.__init__(self) BackendAPIProvider.__init__(self, backend_url=backend_url) class HistoricalProvider(SupabaseProvider, BackendAPIProvider): def fetch_kafka_topic(self, topic: str) -> pd.DataFrame: path = "/home/velocitatem/Documents/Projects/PHANTOM/experiments/collected_data/858c61ab-0a7f-4595-ae49-33f4365517b9/" interactions_file = "messages(2).json" prices_file = "messages(3).json" if topic == "interactions": data = pd.read_json(path + interactions_file) elif topic == "price_logs": pd.read_json(path + prices_file) data = pd.read_json(path + (interactions_file if topic == "user-interactions" else prices_file)) data = [r['payload'] for r in data['value'].to_list()] data = pd.DataFrame(data) return data # example run context = PipelineContext( provider=HistoricalProvider(), store_mode='hotel', ) product_features, prices = full_pipeline(context) print(prices.to_string())