mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
Airflow addition (#28)
* introducing airflow to run pipeline * chore: updating dag with upload to registry * introducing complete provider (non refactored and noisy) * chore: removing old shit * generic pricing baselines * feature: super simple model registry (to be updated maybe third party OS software) * chore: refactoring the providers docker config and requirements * chore: refactored and broke down components (braking * exporting all * local pipeline excution working * fix: fixing import structures from nonrelativistic * chore: enables cross comm pickling with fully e2e pipeline compilation * docs: what the pipeline is like now * pipelines local running and pipeline high level definition * cleaning old pipeline and vectorization * leaked but fixing, not so important * test: started with pipeline step testing * chore: cleaning up provider of prices * test: extra tests wit hsemantic meaning checks * migrating pricers * feature: introducing pricing predictors (pricers) * chore: e2e is done with new pipeline * extra session feature extraction * feature: experiemntal sessin pricer and metrics(vibe) * chore: redefined and connected pricers (#29)
This commit is contained in:
committed by
GitHub
parent
2a0e44ab24
commit
ad9423bf59
34
experiments/procesing/steps/join.py
Executable file
34
experiments/procesing/steps/join.py
Executable file
@@ -0,0 +1,34 @@
|
||||
import pandas as pd
|
||||
from procesing.steps.base import BaseContextStep
|
||||
|
||||
class JoinExperimentsStep(BaseContextStep):
|
||||
"""Join experiment metadata to interactions"""
|
||||
|
||||
def transform(self, data: tuple):
|
||||
"""
|
||||
Args:
|
||||
data: (interactions_df, experiments_df)
|
||||
Returns:
|
||||
merged interactions dataframe
|
||||
"""
|
||||
interactions_df, experiments_df = data
|
||||
|
||||
if experiments_df.empty:
|
||||
return interactions_df
|
||||
|
||||
# Flatten nested task field if present
|
||||
if 'task' in experiments_df.columns and experiments_df['task'].notnull().any():
|
||||
task_norm = pd.json_normalize(experiments_df['task'].dropna())
|
||||
task_norm.index = experiments_df[experiments_df['task'].notnull()].index
|
||||
experiments_df = experiments_df.drop('task', axis=1).join(task_norm, rsuffix='_task')
|
||||
|
||||
# Rename for clarity
|
||||
experiments_df = experiments_df.rename(columns={
|
||||
'id': 'experimentId',
|
||||
'subject_name': 'exp_subject',
|
||||
'xp_human_only': 'exp_human_only',
|
||||
'xp_market_mode': 'exp_market_mode',
|
||||
'xp_task_id': 'exp_task_id'
|
||||
})
|
||||
|
||||
return interactions_df.merge(experiments_df, on='experimentId', how='left')
|
||||
Reference in New Issue
Block a user