mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
Airflow addition (#28)
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
This commit is contained in:
committed by
GitHub
parent
2a0e44ab24
commit
ad9423bf59
@@ -130,25 +130,24 @@ class TemporalElasticityEstimator(BaseEstimator, TransformerMixin):
|
||||
|
||||
def _build_product_timeseries(self, aligned_chunks):
|
||||
"""Build time series [price, quantity] per product."""
|
||||
series_by_product = {}
|
||||
|
||||
# vectorize chunk merging instead of iterating rows
|
||||
all_merged = []
|
||||
for chunk in aligned_chunks:
|
||||
demand_df = chunk['demand']
|
||||
price_df = chunk['prices']
|
||||
merged = chunk['demand'].merge(chunk['prices'], on='productId', how='inner')
|
||||
merged['timestamp'] = chunk['window_start']
|
||||
all_merged.append(merged[['productId', 'timestamp', 'price', 'demand_score']])
|
||||
|
||||
# merge on productId
|
||||
merged = demand_df.merge(price_df, on='productId', how='inner')
|
||||
if not all_merged:
|
||||
return {}
|
||||
|
||||
for _, row in merged.iterrows():
|
||||
pid = row['productId']
|
||||
if pid not in series_by_product:
|
||||
series_by_product[pid] = []
|
||||
|
||||
series_by_product[pid].append({
|
||||
'timestamp': chunk['window_start'],
|
||||
'price': row['price'],
|
||||
'quantity': row['demand_score']
|
||||
})
|
||||
# concat all chunks and group by productId in one pass
|
||||
combined = pd.concat(all_merged, ignore_index=True)
|
||||
series_by_product = {
|
||||
pid: group[['timestamp', 'price', 'demand_score']].rename(
|
||||
columns={'demand_score': 'quantity'}
|
||||
).to_dict('records')
|
||||
for pid, group in combined.groupby('productId')
|
||||
}
|
||||
|
||||
return series_by_product
|
||||
|
||||
|
||||
Reference in New Issue
Block a user