mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
scary
This commit is contained in:
20
Makefile
20
Makefile
@@ -44,7 +44,7 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
|
||||
|
||||
.PHONY: help
|
||||
help:
|
||||
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all"
|
||||
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all"
|
||||
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
|
||||
@echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
|
||||
@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
|
||||
@@ -235,9 +235,25 @@ count-lines:
|
||||
all:
|
||||
@$(NX) run paper:build
|
||||
|
||||
.PHONY: manim.render manim.render.all
|
||||
.PHONY: manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all
|
||||
# Main defense reel (paper/defense/manim/render_defense); uses paper/defense/.venv when present
|
||||
manim.defense:
|
||||
@cd paper/defense/manim && ./render_defense full
|
||||
|
||||
manim.defense.hq:
|
||||
@cd paper/defense/manim && ./render_defense full --quality qh
|
||||
|
||||
manim.render:
|
||||
@$(NX) run manim:render
|
||||
|
||||
manim.render.full:
|
||||
@$(NX) run manim:render-full
|
||||
|
||||
manim.render.poster:
|
||||
@$(NX) run manim:render-poster
|
||||
|
||||
manim.render.appendix:
|
||||
@$(NX) run manim:render-appendix
|
||||
|
||||
manim.render.all:
|
||||
@$(NX) run manim:render-all
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
manim>=0.18,<1
|
||||
numpy>=1.24
|
||||
@@ -304,7 +304,7 @@ $\mathcal{A}_{\text{filter}}$ & \texttt{search}, \texttt{filter\_date}, \texttt{
|
||||
This partition enables the weight function $\omega$ from Eq.~\ref{eq:qhat} to assign category-specific signal strengths, with $\omega(\mathcal{A}_{\text{cart}}) > \omega(\mathcal{A}_{\text{dwell}}) > \omega(\mathcal{A}_{\text{nav}}) > \omega(\mathcal{A}_{\text{filter}})$ reflecting decreasing commitment.
|
||||
It's important to acknowledge that this creates a very blatant assumption in the weighting, and we motivate the scale of each weight by the per-category observed divergence between each behavioral profile.
|
||||
In the simulator baseline this order is encoded with a compact fixed scale: cart $=4.0$, dwell $=2.0$, nav $=1.0$, filter $=0.5$. Unknown actions are mapped by prefix heuristics to the nearest category.
|
||||
We back this up by saying that each weight was assigned by observing an initial small dataset and computing KL divergence between each interaction type; the ones with the highest divergence receive a proportionately high weight in our demand estimation.
|
||||
We back this up by saying that each weight was assigned by observing an initial small dataset and computing KL divergence between each interaction type; the ones with the highest divergence receive a proportionately high weight in our demand estimation. From the order which we observe in divergences, we assign a multiple of 2 increase in weight ascending form the lowest weight of $0.5$ in rare filtering operations.
|
||||
|
||||
The metadata record $\mu$ varies by action type. For product views, $\mu$ contains the observed price $p_{\text{obs}}$ and product attributes. For dwell events, $\mu$ includes the element text and accumulated hover duration. This heterogeneous structure is captured via a schema-on-read approach in our Kafka ingestion pipeline, where events are validated against type-specific schemas before storage.
|
||||
|
||||
|
||||
@@ -73,35 +73,35 @@ In our complete training runs we logged $\approx 180$ days of net compute time.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\input{chapters/figures/results/includes/final/final_focus_revenue_by_alpha.tex}
|
||||
\input{chapters/figures/results/includes/final_focus_revenue_by_alpha.tex}
|
||||
\caption{Revenue curves by contamination for the final cohort. The baseline remains above the defended curve in most cells, but the gap narrows in the high-contamination region.}
|
||||
\label{fig:final_focus_revenue_by_alpha}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\input{chapters/figures/results/includes/final/final_focus_coi_by_alpha.tex}
|
||||
\input{chapters/figures/results/includes/final_focus_coi_by_alpha.tex}
|
||||
\caption{COI level curves by contamination for the final cohort. The shaded band marks the per-$\alpha$ gap between defended and baseline policies.}
|
||||
\label{fig:final_focus_coi_by_alpha}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\input{chapters/figures/results/includes/final/final_focus_coi_preservation_grid.tex}
|
||||
\input{chapters/figures/results/includes/final_focus_coi_preservation_grid.tex}
|
||||
\caption{COI preservation by product count at the contamination endpoints ($\alpha=0.0$ and $\alpha=1.0$). Bars report defended-minus-baseline mean COI level, with the zero line separating preservation from erosion.}
|
||||
\label{fig:final_focus_coi_preservation_grid}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\input{chapters/figures/results/includes/final/final_focus_revenue_delta.tex}
|
||||
\input{chapters/figures/results/includes/final_focus_revenue_delta.tex}
|
||||
\caption{Defended-minus-baseline revenue delta over contamination for the final cohort. The strongest high-contamination deviation begins at $\alpha=0.7$, followed by recovery toward near parity by $\alpha=1.0$.}
|
||||
\label{fig:final_focus_revenue_delta}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\input{chapters/figures/results/includes/final/final_focus_risk_deltas.tex}
|
||||
\input{chapters/figures/results/includes/final_focus_risk_deltas.tex}
|
||||
\caption{Defended-minus-baseline leakage and volatility deltas for the final cohort. Leakage remains lower for the defended policy across the full contamination range.}
|
||||
\label{fig:final_focus_risk_deltas}
|
||||
\end{figure}
|
||||
|
||||
@@ -21,4 +21,6 @@ Now we very explicitly mention what we contribute in this paper:
|
||||
|
||||
\subsection{Future Works and Next Steps}
|
||||
|
||||
During the eights months of research dedicated to this work, a plethora of opportunities and industry gaps was identified, sadly a majority of which could not be addressed directly.
|
||||
In our effort to tackle this work we initiated a set of constraints which we hope to relax in future iterations and hope that some of these will be addressed in industry. First of these constraints is the weighting of different actions within the demand estimation, which we would ideally find through learned methodology. Next, assumption of perfect alternating turns between the platform and the market calls for a fixed length non-strictly alternating state definition with a history of actions to possibly allow for the development of multi agentic or multi platform simulation. In our simulation we also make assumptions of non-perishable supply of items, which creates the biggest sim-to-real gap in our system. We also would like to further remove intra-session stationary nature of the contamination parameter to further create high-fidelity non-stationarity within a single evaluation window.
|
||||
|
||||
For deployment of this it is advised to collect a higher sample size of human baselines and to complement this with the simulated agentic sessions and to mind the matrix scaling for very large catalog sizes.
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
{
|
||||
"runs": 340,
|
||||
"tiers": 5,
|
||||
"alphas": 6,
|
||||
"status": "ok",
|
||||
"mean_tier_revenue_robust": 190714.62212212436,
|
||||
"mean_tier_revenue_no_robust": 197371.17216609977,
|
||||
"mean_tier_revenue_delta": -6656.5500439754105,
|
||||
"mean_tier_revenue_delta_pct": -3.3726050116242514
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
tier,alpha,runs_robust,runs_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_margin_mean_delta,eval_margin_mean_delta_pct,objective_score_delta,objective_score_delta_pct,train_alpha_adv_delta,train_alpha_adv_delta_pct
|
||||
dqn,0.0,5.0,2.0,-31308.987414117495,-8.73651226889534,-1909.7427407095092,-0.5742991901121623,-2.8982436567700063,-2.1108702433020436,-0.001972064237093285,-0.2116777198290971,-1909.7427407095092,-0.5742991901121623,,
|
||||
dqn,0.1,8.0,4.0,-7723.542755668925,-2.2789188721535494,-74239.37371836061,-21.063854618469847,1.7435833801418141,1.2859365583872486,0.0011891962142838164,0.1278074871971924,-74239.37371836061,-21.063854618469847,0.17619791666666657,176.19791666666694
|
||||
dqn,0.25,7.0,3.0,-12344.82818986749,-3.7035466052614323,93154.03627578515,36.06691230407512,0.03214544949867104,0.023426184113378143,1.763733457238459e-05,0.001893256490383175,93154.03627578515,36.06691230407512,0.14530952380952394,58.12380952380958
|
||||
dqn,0.4,5.0,10.0,-7816.300706216833,-2.4694340725162824,-42362.74668471434,-13.411888482380219,0.6251272343707797,0.4579446603861758,0.0002750615520492605,0.02953644634355915,-42362.74668471434,-13.411888482380219,0.09856666666666747,24.64166666666691
|
||||
dqn,0.6,5.0,4.0,-16150.011887742497,-5.347485987139731,-28508.74710866122,-10.151356300001888,-0.63306323164079,-0.46056970247177387,-0.00034537433455417155,-0.0370668515552649,-28508.74710866122,-10.151356300001888,0.1361999999999981,22.699999999999644
|
||||
dqn,0.8,7.0,6.0,-18191.8826663699,-6.440527544692988,-55296.94441124235,-20.19273590083627,-0.796733634735034,-0.579832425016392,-0.0006423984775592029,-0.0689476165584585,-55296.94441124235,-20.19273590083627,0.1532857142857158,19.160714285714512
|
||||
linear,0.0,9.0,8.0,-14967.67388588126,-4.273413942959129,-20107.23171681742,-6.60039931288617,-0.06127790826209889,-0.04564810574240612,-7.607744079518586e-05,-0.008177885913528719,-20107.23171681742,-6.60039931288617,,
|
||||
linear,0.1,3.0,5.0,-24531.399901538738,-7.171831328305365,-96669.7835552101,-26.44920711447249,-0.3680976907859872,-0.2733723058172187,-0.0002515287835096469,-0.02702956778346356,-96669.7835552101,-26.44920711447249,,
|
||||
linear,0.25,6.0,9.0,-14840.859479571285,-4.520682292638562,-26510.179456423968,-8.033117756667396,-0.13734776448131925,-0.10212641096230607,-9.41162442338328e-05,-0.010115001392981545,-26510.179456423968,-8.033117756667396,,
|
||||
linear,0.4,4.0,11.0,-17196.7642560167,-5.486915251242723,-74520.10209817477,-25.042311510043184,0.12217076984330788,0.09098828726103136,0.00010713887099822461,0.011516865671259795,-74520.10209817477,-25.042311510043184,,
|
||||
linear,0.6,5.0,3.0,-14284.06615788641,-4.854766876637072,38417.71856593515,14.088596762512362,0.24251461234271687,0.1806530855220358,0.0002606811969937395,0.028024824619509187,38417.71856593515,14.088596762512362,,
|
||||
linear,0.8,4.0,11.0,-10840.488575784548,-3.933600919557566,15749.581078662042,6.447651726824251,0.028051260535562506,0.020876236575910773,5.361882659971062e-05,0.005763158099097226,15749.581078662042,6.447651726824251,,
|
||||
qtable,0.0,9.0,8.0,-18644.457288398524,-8.15323701554329,32993.42568058451,20.675688115613053,10.369779227648095,10.682768960780463,0.018566897519637582,2.0803084179092814,32993.42568058451,20.675688115613053,0.11839814814814797,
|
||||
qtable,0.1,6.0,5.0,-12549.400855549495,-4.616991193742389,-37207.79701261924,-15.336047254435487,0.0884057957559321,0.07703761042583206,-0.01127789819771663,-1.2272540823820444,-37207.79701261924,-15.336047254435487,0.07577777777777787,75.77777777777803
|
||||
qtable,0.25,6.0,5.0,-1534.3527429780224,-0.5456640130847226,18433.43663451099,7.304472653867784,-0.5776125938941306,-0.45734160960552755,-0.003316338490628068,-0.3584028328803385,18433.43663451099,7.304472653867784,0.1181458333333334,47.258333333333354
|
||||
qtable,0.4,8.0,6.0,-15146.258176090778,-5.274860187729517,-37364.22587794208,-13.005651205148677,0.4611471727478005,0.3629050099230144,0.0071046453227539,0.7751478467862876,-37364.22587794208,-13.005651205148677,0.11010416666666772,27.52604166666698
|
||||
qtable,0.6,6.0,6.0,-9577.578548656049,-3.9322693501816666,-19088.152339068736,-9.571307395166029,0.9081750157567683,0.7495917946306662,0.0015520804425310786,0.16838348372043557,-19088.152339068736,-9.571307395166029,0.16983333333333228,28.305555555555333
|
||||
qtable,0.8,5.0,2.0,-52751.680936846446,-19.699089872409548,-16508.209313987172,-7.589601869470744,-15.022454081083623,-11.215398490282094,-0.007791824761087751,-0.8384414846099099,-16508.209313987172,-7.589601869470744,0.11120000000000174,13.900000000000245
|
||||
static,0.0,5.0,6.0,-4782.871053113384,-5.233544525848519,14411.4689779756,25.538141347978577,1.307060701942973,1.8731997380823568,0.002537468952847566,0.2911381045328444,14411.4689779756,25.538141347978577,,
|
||||
static,0.1,8.0,5.0,1629.4524528499896,1.880088900553112,-5347.078589385725,-8.14812684380662,0.3600324838305795,0.5019134064795009,-4.6492644957929485e-05,-0.005316014641356001,-5347.078589385725,-8.14812684380662,,
|
||||
static,0.25,5.0,6.0,-9938.662276761897,-10.398087633377964,-23616.087243780566,-27.701108621456626,-3.0513860773271233,-4.099238223547561,-0.003519771479853273,-0.40113716461596144,-23616.087243780566,-27.701108621456626,,
|
||||
static,0.4,3.0,4.0,1850.8400595222774,2.1912497828943436,15058.659457798465,23.67199439061036,3.669612467486587,5.430169778169349,0.006763447803564415,0.7804393835882188,15058.659457798465,23.67199439061036,,
|
||||
static,0.6,6.0,5.0,1038.893948415236,1.2765037688226162,-6062.864079504681,-9.363144945348399,-1.712609061865976,-2.3996341009364213,-0.0042285583442709385,-0.48362088973179423,-6062.864079504681,-9.363144945348399,,
|
||||
static,0.8,3.0,7.0,2696.6340631967323,3.6826150812750567,149.22406835677975,0.27280281303997084,0.8491716126507072,1.2427748744725668,0.0032786525965587954,0.3777595573932637,149.22406835677975,0.27280281303997084,,
|
||||
surge,0.0,6.0,6.0,-606.73760243367,-5.066579306500225,-244.17585425326251,-5.525800641331023,0.014874931199557295,0.09186560988877175,0.0019308940532419272,0.4471794260021321,-244.17585425326251,-5.525800641331023,,
|
||||
surge,0.1,2.0,5.0,169.78743573408792,1.446343107913299,-1012.7706974660168,-20.02053666691211,-0.14459518037699226,-0.864651254901582,-0.0018650458785858248,-0.4260349899970559,-1012.7706974660168,-20.02053666691211,,
|
||||
surge,0.25,10.0,7.0,-128.20993816584632,-1.1276930411162496,-81.21373487263281,-1.7081453033360994,0.3008506477195141,1.839047728806548,0.0030750148302954305,0.7102446987902812,-81.21373487263281,-1.7081453033360994,,
|
||||
surge,0.4,6.0,6.0,-473.03722764431404,-4.297928307550563,28.557452243338048,0.6755106104955642,-0.5027452173053764,-3.072002360121898,-0.005581380442163164,-1.288152985482699,28.557452243338048,0.6755106104955642,,
|
||||
surge,0.6,2.0,5.0,307.79436325796996,3.0356727142643067,2060.57396030564,63.382050333909866,0.2339650444065704,1.438519400758399,0.001302270025389629,0.30077697380833807,2060.57396030564,63.382050333909866,,
|
||||
surge,0.8,3.0,3.0,423.15386247993047,4.372210191290083,1117.0942083304312,34.86182570616373,0.8971464536957541,5.327339899805159,0.007068630716831503,1.6094191039618562,1117.0942083304312,34.86182570616373,,
|
||||
|
@@ -1,61 +0,0 @@
|
||||
tier,alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
|
||||
dqn,0.0,no_robust,2,358369.40933039243,3531.782519351935,332534.46523867303,114183.5587841961,137.30089123035202,0.8184776440325546,0.9316352418598786,0.0006839003676302996,332534.46523867303,114183.5587841961,,
|
||||
dqn,0.0,robust,5,327060.42191627494,24311.17412598574,330624.7224979635,62834.39223547943,134.40264757358202,6.160000643680792,0.9296631776227853,0.004262039730140749,330624.7224979635,62834.39223547943,0.17835000000000004,0.08829347371125472
|
||||
dqn,0.1,no_robust,4,338912.58043645386,19584.736810155388,352449.13650924934,34076.74819101191,135.58860029055563,3.4055508991301524,0.9304589585186211,0.0023438665484978773,352449.13650924934,34076.74819101191,0.0999999999999998,0.0
|
||||
dqn,0.1,robust,8,331189.03768078494,8060.912085646968,278209.7627908887,57861.69545853692,137.33218367069745,0.43113256118808096,0.931648154732905,0.000296560958972609,278209.7627908887,57861.69545853692,0.2761979166666664,0.09826648189130198
|
||||
dqn,0.25,no_robust,3,333324.4996115304,6101.717861804452,258281.15112936878,46772.05216097596,137.2201692904545,0.9866477887862672,0.9315871706751672,0.0006356053229300815,258281.15112936878,46772.05216097596,0.25,0.0
|
||||
dqn,0.25,robust,7,320979.6714216629,7345.8761269427705,351435.18740515393,40320.63699261721,137.25231473995316,0.3527287960309152,0.9316048080097395,0.0002575240668471541,351435.18740515393,40320.63699261721,0.39530952380952394,0.073021206240698
|
||||
dqn,0.4,no_robust,10,316521.94295076875,3631.1820920182718,315859.66987697606,59129.03566963754,136.50715652926755,0.5085743959240285,0.931261495881483,0.00031280530251053175,315859.66987697606,59129.03566963754,0.3999999999999993,0.0
|
||||
dqn,0.4,robust,5,308705.6422445519,10654.571556448245,273496.9231922617,68868.59270778317,137.13228376363833,0.9543108715306617,0.9315365574335323,0.0006302636717132419,273496.9231922617,68868.59270778317,0.49856666666666677,0.05745573175159429
|
||||
dqn,0.6,no_robust,4,302011.2988903938,2354.1141598720183,280836.828756133,58683.00124997926,137.4522093492651,0.4692723362517602,0.9317606434396914,0.0003317518021682495,280836.828756133,58683.00124997926,0.600000000000001,0.0
|
||||
dqn,0.6,robust,5,285861.2870026513,10386.571631344234,252328.08164747176,59388.56063758225,136.8191461176243,1.0629203361893034,0.9314152691051373,0.0005692783702932289,252328.08164747176,59388.56063758225,0.7361999999999991,0.07108625433623189
|
||||
dqn,0.8,no_robust,6,282459.51189759385,2625.018247527438,273845.72691287595,66378.16690732416,137.4075681801531,0.29728950101826707,0.9317196295169007,0.00022799290978965786,273845.72691287595,66378.16690732416,0.7999999999999985,0.0
|
||||
dqn,0.8,robust,7,264267.62923122395,6771.288971321149,218548.7825016336,50043.2009443344,136.61083454541807,1.2319662937254596,0.9310772310393415,0.0010118564779437284,218548.7825016336,50043.2009443344,0.9532857142857143,0.04709817507333055
|
||||
linear,0.0,no_robust,8,350250.9723061577,3156.286820918861,304636.59490360576,71682.88027353655,134.2397614654424,0.32611787466946035,0.9302824910938235,0.00024020749661685483,304636.59490360576,71682.88027353655,,
|
||||
linear,0.0,robust,9,335283.29842027643,7707.594869976611,284529.36318678834,55524.58819004573,134.1784835571803,0.4477314164684001,0.9302064136530284,0.00034781034181738526,284529.36318678834,55524.58819004573,,
|
||||
linear,0.1,no_robust,5,342052.1032713031,2576.546352056584,365492.17954557994,44890.93522299766,134.65068807375954,0.2181027640393531,0.930569018064469,0.00014058935916940913,365492.17954557994,44890.93522299766,,
|
||||
linear,0.1,robust,3,317520.7033697644,4796.580459456527,268822.39599036984,39256.421140635124,134.28259038297355,0.24570499109363475,0.9303174892809594,0.00018817899183709092,268822.39599036984,39256.421140635124,,
|
||||
linear,0.25,no_robust,9,328288.0441241802,2178.525494145428,330011.0898339667,38591.36053388808,134.48799697074742,0.2199303973026469,0.9304619997297959,0.00015341642413402035,330011.0898339667,38591.36053388808,,
|
||||
linear,0.25,robust,6,313447.18464460893,11811.426711620714,303500.9103775427,63358.917144214036,134.3506492062661,0.2947034403278951,0.9303678834855621,0.00021446628431268986,303500.9103775427,63358.917144214036,,
|
||||
linear,0.4,no_robust,11,313414.0672597746,1982.9537556159262,297576.7714904776,69396.90446617964,134.2708754290745,0.3062093691351849,0.9302780292522507,0.00023067974755288992,297576.7714904776,69396.90446617964,,
|
||||
linear,0.4,robust,4,296217.3030037579,5109.898340355844,223056.66939230284,38293.73688466607,134.3930461989178,0.12347753686382154,0.9303851681232489,7.324605809708878e-05,223056.66939230284,38293.73688466607,,
|
||||
linear,0.6,no_robust,3,294227.64307441004,2081.9176570448135,272686.62176604365,66672.50905805513,134.24327165069943,0.30764332256042104,0.9301795837547151,0.00020453921786790446,272686.62176604365,66672.50905805513,,
|
||||
linear,0.6,robust,5,279943.5769165236,9866.031719660255,311104.3403319788,28363.930707781863,134.48578626304214,0.21280262186464388,0.9304402649517088,0.00020533894868120649,311104.3403319788,28363.930707781863,,
|
||||
linear,0.8,no_robust,11,275586.89347174135,1618.038877505867,244268.4832547461,56201.44465269986,134.36933631960773,0.2845660213184439,0.9303723007028001,0.00017640716421186918,244268.4832547461,56201.44465269986,,
|
||||
linear,0.8,robust,4,264746.4048959568,7976.6279174956235,260018.06433340814,57942.49882730146,134.3973875801433,0.31511916357643405,0.9304259195293998,0.00023606570471334208,260018.06433340814,57942.49882730146,,
|
||||
qtable,0.0,no_robust,8,228675.52179404112,103199.70453252994,159575.94976328663,95848.81008103945,97.07014413321637,33.0637115678536,0.8925069648229078,0.04890522141482132,159575.94976328663,95848.81008103945,0.0,0.0
|
||||
qtable,0.0,robust,9,210031.0645056426,84361.3834579348,192569.37544387113,116824.7880426837,107.43992336086447,21.41128645838254,0.9110738623425454,0.019188350719133364,192569.37544387113,116824.7880426837,0.11839814814814797,0.061909456985161225
|
||||
qtable,0.1,no_robust,5,271809.0706466638,14898.209045050968,242616.60384397948,49181.45526408063,114.75666919996793,3.461383158930426,0.9189538140159812,0.002294693249439748,242616.60384397948,49181.45526408063,0.0999999999999998,0.0
|
||||
qtable,0.1,robust,6,259259.66979111428,102995.29934229614,205408.80683136024,94155.1845420674,114.84507499572386,36.206421837506966,0.9076759158182646,0.048591979839360346,205408.80683136024,94155.1845420674,0.17577777777777767,0.06720562696899951
|
||||
qtable,0.25,no_robust,5,281190.01916657295,70274.10208723843,252358.2126733039,129868.46825082717,126.29784427276161,15.368804047323954,0.9253103453385114,0.009044883517550522,252358.2126733039,129868.46825082717,0.25,0.0
|
||||
qtable,0.25,robust,6,279655.6664235949,93056.2549557545,270791.6493078149,116021.46257259768,125.72023167886748,26.760714047253796,0.9219940068478834,0.022785695882060884,270791.6493078149,116021.46257259768,0.3681458333333334,0.08845114686619042
|
||||
qtable,0.4,no_robust,6,287140.4669895195,32698.16434426399,287292.23388022534,83855.95000252876,127.07104066863859,9.200301166154173,0.9165535777734913,0.01306001923887748,287292.23388022534,83855.95000252876,0.3999999999999993,0.0
|
||||
qtable,0.4,robust,8,271994.2088134287,79259.3185780895,249928.00800228326,88265.30801790548,127.53218784138639,23.406428094683015,0.9236582230962452,0.020073747007871224,249928.00800228326,88265.30801790548,0.510104166666667,0.09294655989347765
|
||||
qtable,0.6,no_robust,6,243563.64469828535,67006.60707045678,199430.98211127534,79119.52886604435,121.15594411011905,17.91243944823949,0.9217533740470492,0.011558797825966702,199430.98211127534,79119.52886604435,0.600000000000001,0.0
|
||||
qtable,0.6,robust,6,233986.0661496293,43155.478617087436,180342.8297722066,48117.79957836251,122.06411912587582,12.160951090203252,0.9233054544895802,0.006840854872863436,180342.8297722066,48117.79957836251,0.7698333333333333,0.09107066853090896
|
||||
qtable,0.8,no_robust,2,267787.4017455507,1552.038101264713,217510.87340156303,45358.788584678456,133.9448981157492,0.47346860040111405,0.9293224278749692,0.0002998116010539045,217510.87340156303,45358.788584678456,0.7999999999999985,0.0
|
||||
qtable,0.8,robust,5,215035.72080870424,32869.73253165852,201002.66408757586,63247.67956376057,118.92244403466557,8.586916805142152,0.9215306031138815,0.004644709320891907,201002.66408757586,63247.67956376057,0.9112000000000002,0.07381653307732307
|
||||
static,0.0,no_robust,6,91388.75248869567,13415.65534300268,56431.15832748852,8525.098185703384,69.77689967440658,3.670744870085874,0.8715688236409825,0.005831496806767582,56431.15832748852,8525.098185703384,,
|
||||
static,0.0,robust,5,86605.88143558228,7614.909395960895,70842.62730546412,8033.737230392738,71.08396037634955,3.6802889678420283,0.8741062925938301,0.005083911544334936,70842.62730546412,8033.737230392738,,
|
||||
static,0.1,no_robust,5,86668.90445290186,8037.955688932984,65623.40881389238,19329.448262530004,71.73199185012882,4.199046495412734,0.874577067494122,0.006610505646022198,65623.40881389238,19329.448262530004,,
|
||||
static,0.1,robust,8,88298.35690575185,9576.838833058617,60276.33022450666,13359.490452744656,72.0920243339594,6.7706096714767865,0.8745305748491641,0.010083585815241344,60276.33022450666,13359.490452744656,,
|
||||
static,0.25,no_robust,6,95581.63603909909,8345.698435455577,85253.22060752509,13111.526873622026,74.43788116042678,2.1078820386097368,0.8774483618896327,0.0037254791853004897,85253.22060752509,13111.526873622026,,
|
||||
static,0.25,robust,5,85642.97376233719,9472.880627242153,61637.13336374452,15937.429780623212,71.38649508309966,4.0264905454627264,0.8739285904097794,0.005323853359397925,61637.13336374452,15937.429780623212,,
|
||||
static,0.4,no_robust,4,84465.04245981346,12101.831388745604,63613.81812329075,7778.361846092061,67.5782271530322,3.9088888968092,0.8666205147756862,0.007149121199217965,63613.81812329075,7778.361846092061,,
|
||||
static,0.4,robust,3,86315.88251933573,8642.748496122398,78672.47758108922,17823.74997200773,71.24783962051879,2.790416943786253,0.8733839625792507,0.005990544453538607,78672.47758108922,17823.74997200773,,
|
||||
static,0.6,no_robust,5,81385.88962988024,12343.523894997037,64752.43216774836,23486.779472906223,71.36959177224794,5.100226704959064,0.874353948320141,0.007787250295491337,64752.43216774836,23486.779472906223,,
|
||||
static,0.6,robust,6,82424.78357829548,9831.886701625144,58689.56808824368,12672.506035553573,69.65698271038197,3.484982360048201,0.8701253899758701,0.005917711231889304,58689.56808824368,12672.506035553573,,
|
||||
static,0.8,no_robust,7,73226.06364450825,4447.877985963851,54700.340767716196,14406.881298569717,68.32867561883204,3.68262917356943,0.8679204886788817,0.007467501164611224,54700.340767716196,14406.881298569717,,
|
||||
static,0.8,robust,3,75922.69770770498,5046.089536162847,54849.564836072976,22780.98012221352,69.17784723148274,1.5268167784698885,0.8711991412754405,0.0033278715575433297,54849.564836072976,22780.98012221352,,
|
||||
surge,0.0,no_robust,6,11975.290738176132,411.4052900076416,4418.832131346071,896.5828048394391,16.192056219479124,0.8040364003224534,0.4317940274006973,0.008271862690929055,4418.832131346071,896.5828048394391,,
|
||||
surge,0.0,robust,6,11368.553135742462,623.8217438159004,4174.6562770928085,639.9963040241264,16.20693115067868,0.9853827520149101,0.4337249214539392,0.010371668289035135,4174.6562770928085,639.9963040241264,,
|
||||
surge,0.1,no_robust,5,11739.084232858655,332.778792718381,5058.659087494994,1110.8409258976824,16.722948073839394,0.6578121995950104,0.4377682402562083,0.005683401047550787,5058.659087494994,1110.8409258976824,,
|
||||
surge,0.1,robust,2,11908.871668592743,81.41250285550258,4045.8883900289775,784.7169500268457,16.5783528934624,0.4088194924856508,0.4359031943776225,0.004531137621699143,4045.8883900289775,784.7169500268457,,
|
||||
surge,0.25,no_robust,7,11369.223138855004,236.1121240061105,4754.4980344481255,1038.0550037539617,16.359045119223275,0.3945156775653057,0.4329514652531622,0.0038762110261952457,4754.4980344481255,1038.0550037539617,,
|
||||
surge,0.25,robust,10,11241.013200689158,684.503587066406,4673.284299575493,1187.78635131025,16.65989576694279,1.0515950311117155,0.4360264800834576,0.009701952962125513,4673.284299575493,1187.78635131025,,
|
||||
surge,0.4,no_robust,6,11006.168409400554,364.6584583108646,4227.535704048808,1414.7964077877168,16.365391636138824,0.9138430058543858,0.4332855262584901,0.008024003783434592,4227.535704048808,1414.7964077877168,,
|
||||
surge,0.4,robust,6,10533.13118175624,526.0758051960169,4256.093156292146,783.7965507386594,15.862646418833448,0.7732699435426456,0.42770414581632693,0.008967505611725135,4256.093156292146,783.7965507386594,,
|
||||
surge,0.6,no_robust,5,10139.2472848498,97.448078425168,3251.037082975553,742.2100315641153,16.26429537781848,0.4432465691073604,0.4329686574409998,0.004121820888165019,3251.037082975553,742.2100315641153,,
|
||||
surge,0.6,robust,2,10447.04164810777,524.0029334247373,5311.611043281193,1808.6200710093085,16.49826042222505,0.6088756908260344,0.43427092746638946,0.007817511630542989,5311.611043281193,1808.6200710093085,,
|
||||
surge,0.8,no_robust,3,9678.259826640971,272.83530913170915,3204.3479815026553,556.8799617962688,16.840420745981802,0.4589959822922529,0.43920385308157944,0.004953937449529005,3204.3479815026553,556.8799617962688,,
|
||||
surge,0.8,robust,3,10101.413689120902,526.8318040489241,4321.442189833087,1284.166148011517,17.737567199677557,0.6586775330563983,0.44627248379841095,0.004644261847052545,4321.442189833087,1284.166148011517,,
|
||||
|
@@ -1,11 +0,0 @@
|
||||
tier,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
|
||||
dqn,no_robust,29,315185.66674813855,23538.781000060844,302576.8036266896,62951.88633145167,136.82560356086017,1.3692652218935986,0.9313739013618878,0.0009314135057224836,302576.8036266896,62951.88633145167,0.45740740740740693,0.2368477698794438
|
||||
dqn,robust,37,306875.13950902375,27585.74444520695,283724.7169827867,69843.05611741856,136.68837571992978,2.3797541654948753,0.9312171495138941,0.0016512408492580111,283724.7169827867,69843.05611741856,0.5058198198198196,0.28324483129860284
|
||||
linear,no_robust,47,315501.15296155965,27105.014861872147,298149.1730416604,67664.7308344108,134.36884359609928,0.29743647613433244,0.9303607531364,0.0002152647006739543,298149.1730416604,67664.7308344108,,
|
||||
linear,robust,31,306269.9232239004,26399.875293394463,279872.824370329,54401.104602086416,134.32737693008372,0.31909212993628877,0.9303375215162144,0.00025000448833182963,279872.824370329,54401.104602086416,,
|
||||
qtable,no_robust,32,259818.72178238883,67188.58622318009,222088.83510765125,94450.12569617687,116.84641954166946,22.42810298937963,0.9140582213134033,0.02778864370791322,222088.83510765125,94450.12569617687,0.29218749999999993,0.2559326319498438
|
||||
qtable,robust,40,244470.50673219413,78666.30912808319,216920.53697298188,93983.50987622296,118.94013969887506,23.1428303249914,0.9178608956089163,0.023827311253270544,216920.53697298188,93983.50987622296,0.4396239583333334,0.29521865862482416
|
||||
static,no_robust,33,85228.452028227,12041.415672002751,64828.579890468536,17681.280330831738,70.58818912317687,4.204964531595236,0.8721419294578765,0.007107262779462876,64828.579890468536,17681.280330831738,,
|
||||
static,robust,30,84963.18577955024,8926.291379160475,63243.76603076817,14880.924342692271,70.94358095957392,4.363134562111469,0.8730306888410219,0.006660289247744752,63243.76603076817,14880.924342692271,,
|
||||
surge,no_robust,32,11121.867310184698,809.9895800277001,4260.038064073964,1160.4282377968032,16.416108827015794,0.641203520341943,0.43413855082681374,0.006214799767130059,4260.038064073964,1160.4282377968032,,
|
||||
surge,robust,29,10994.355365953365,750.5115890942825,4448.160863178768,1000.7519971246122,16.495943148858906,0.9823026347466668,0.4347587896392907,0.009698591291108968,4448.160863178768,1000.7519971246122,,
|
||||
|
@@ -1,26 +0,0 @@
|
||||
Name,tier,alpha,mode,objective/score,eval/revenue_mean,eval/reward_mean,eval/coi_level_mean,lambda_coi,robust_radius,learning_rate,batch_size,n_steps,total_timesteps
|
||||
eager-sweep-244,dqn,0.0,no_robust,413274.4339549909,355872.06196128257,413274.4339549909,136.722140138007,0.2,0.1,0.0003,256,4096,15000
|
||||
efficient-sweep-319,linear,0.0,no_robust,410094.0151741567,353309.5198146561,410094.0151741567,134.55152038805429,0.4,0.1,0.001,128,4096,15000
|
||||
swept-sweep-422,linear,0.0,no_robust,403130.32747386186,347611.2815474988,403130.32747386186,133.8559785775022,0.4,0.3,0.0001,512,1024,15000
|
||||
decent-sweep-478,linear,0.1,no_robust,400452.36418713134,345284.5750647792,400452.36418713134,134.73082941975588,0.1,0.2,0.001,128,1024,50000
|
||||
eternal-sweep-339,linear,0.1,no_robust,399628.4231731644,344154.38525771734,399628.4231731644,134.89479277649667,0.4,0.1,0.0001,256,1024,50000
|
||||
ethereal-sweep-21,dqn,0.1,no_robust,398492.807245857,343580.6802427996,398492.807245857,136.67160732585188,0.1,0.2,0.001,512,2048,50000
|
||||
dark-sweep-418,linear,0.1,no_robust,394615.3720658343,339749.76272695075,394615.3720658343,134.39233246711,0.2,0.1,0.0003,256,1024,50000
|
||||
wandering-sweep-122,dqn,0.0,robust,394061.3617726404,339512.43434806296,394061.3617726404,137.6864755964331,0.1,0.3,0.0001,256,2048,30000
|
||||
laced-sweep-132,dqn,0.1,robust,389274.54998495104,335600.5979215904,389274.54998495104,137.36888574027677,0.4,0.2,0.001,256,2048,30000
|
||||
rich-sweep-53,qtable,0.0,robust,388601.2626147048,335630.6853337664,388601.2626147048,133.4414069888203,0.2,0.1,0.0001,512,1024,50000
|
||||
faithful-sweep-430,qtable,0.25,no_robust,387035.6970938766,333255.5771210341,387035.6970938766,137.4906091183188,0.1,0.2,0.0003,128,1024,15000
|
||||
dark-sweep-280,qtable,0.25,no_robust,386318.8845004527,332220.0316564078,386318.8845004527,137.26992450099925,0.4,0.1,0.0001,256,1024,50000
|
||||
chocolate-sweep-383,linear,0.25,no_robust,383989.49015403807,331071.7003244704,383989.49015403807,134.60590742050857,0.1,0.2,0.001,512,1024,30000
|
||||
dry-sweep-263,dqn,0.0,robust,383372.6880637367,330436.0312615148,383372.6880637367,137.40558130223476,0.1,0.3,0.001,128,1024,50000
|
||||
different-sweep-143,qtable,0.0,robust,383278.4198015018,330546.16800945485,383278.4198015018,135.9021538079678,0.1,0.3,0.001,256,2048,30000
|
||||
woven-sweep-139,dqn,0.25,robust,382788.1296637251,329427.735752473,382788.1296637251,136.8968339394894,0.1,0.1,0.001,512,1024,15000
|
||||
dark-sweep-215,dqn,0.25,robust,382358.2401374872,329330.0097603144,382358.2401374872,137.64528612332785,0.2,0.1,0.0001,512,4096,30000
|
||||
charmed-sweep-136,linear,0.25,no_robust,382249.5728044314,329646.2053260979,382249.5728044314,134.46825608007862,0.4,0.1,0.0001,256,2048,15000
|
||||
light-sweep-308,linear,0.0,robust,381939.1275250679,329628.9436641051,381939.1275250679,133.6209821974879,0.2,0.2,0.001,128,4096,30000
|
||||
treasured-sweep-325,linear,0.25,robust,381322.0104772589,328353.58675398555,381322.0104772589,134.8950293943581,0.1,0.1,0.0001,512,2048,15000
|
||||
fine-sweep-202,dqn,0.25,robust,378751.33572275366,326518.9068184018,378751.33572275366,137.2900973301052,0.1,0.2,0.0001,512,2048,30000
|
||||
treasured-sweep-380,linear,0.25,no_robust,377898.0979419424,325869.1953595453,377898.0979419424,134.54118723889738,0.4,0.3,0.001,128,1024,50000
|
||||
pretty-sweep-49,qtable,0.25,robust,377318.4766808995,325282.0152823859,377318.4766808995,137.19609012644068,0.4,0.1,0.0001,128,4096,50000
|
||||
desert-sweep-253,linear,0.25,robust,376808.6335063269,325146.3478714648,376808.6335063269,134.48396340732663,0.2,0.1,0.0003,256,1024,30000
|
||||
jolly-sweep-133,qtable,0.4,no_robust,376419.57394710975,323709.24588324485,376419.57394710975,137.8349363778071,0.1,0.3,0.0001,128,2048,50000
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,7 +0,0 @@
|
||||
alpha,runs_robust,runs_no_robust,eval_revenue_mean_robust,eval_revenue_mean_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_robust,eval_reward_mean_no_robust,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_robust,eval_coi_level_mean_no_robust,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_coi_leakage_mean_robust,eval_coi_leakage_mean_no_robust,eval_coi_leakage_mean_delta,eval_coi_leakage_mean_delta_pct,eval_volatility_mean_robust,eval_volatility_mean_no_robust,eval_volatility_mean_delta,eval_volatility_mean_delta_pct,eval_margin_mean_robust,eval_margin_mean_no_robust,eval_margin_mean_delta,eval_margin_mean_delta_pct,train_alpha_adv_robust,train_alpha_adv_no_robust,train_alpha_adv_delta,train_alpha_adv_delta_pct,train_coi_penalty_robust,train_coi_penalty_no_robust,train_coi_penalty_delta,train_coi_penalty_delta_pct,train_ux_penalty_robust,train_ux_penalty_no_robust,train_ux_penalty_delta,train_ux_penalty_delta_pct,train_agent_prob_robust,train_agent_prob_no_robust,train_agent_prob_delta,train_agent_prob_delta_pct
|
||||
0.0,4.0,4.0,3379.9042994670963,3565.2912010160844,-185.38690154898813,-5.199768857482219,313527.4707462,331300.229069,-17772.758322799986,-5.364547550342456,137.08358925982625,137.28764358955686,-0.2040543297306101,-0.14863269875959326,0.1146626165658294,0.11861133504329742,-0.003948718477468013,-3.3291240470622716,0.06687153537785637,0.06445662162531288,0.0024149137525434905,3.746572022625408,0.9315273502623671,0.9317078361627993,-0.00018048590043218127,-0.019371512552207898,0.18958333333333333,,,,5.553200113221484,,,,61.35134238638615,66.58479574844135,-5.233453362055201,-7.859832418540847,0.12778212146468534,0.11615891320235115,0.011623208262334192,10.00629907933654
|
||||
0.1,4.0,4.0,3307.028238366196,3458.002436284769,-150.97419791857283,-4.365936713473732,306772.49146475,321215.477968,-14442.986503249966,-4.4963544704059375,137.1182041122497,136.82757579763506,0.29062831461465066,0.21240478238427865,0.1128546052304944,0.11704917861668755,-0.004194573386193154,-3.5835991638433753,0.0685405649303561,0.06737596899527175,0.0011645959350843477,1.728503430007924,0.9315331673960889,0.9313276818191593,0.00020548557692967595,0.0220637248243606,0.2818749999999999,0.1,0.18187499999999987,181.87499999999986,5.079528726095333,,,,52.44772950699336,53.288869747139515,-0.841140240146153,-1.578453895039319,0.11644381911386253,0.11765277436070229,-0.0012089552468397546,-1.0275620387270383
|
||||
0.25,4.0,4.0,3134.3438215278165,3300.5539051855053,-166.21008365768876,-5.035823938416998,290691.4771835,306522.90003785,-15831.422854350007,-5.16484179563586,136.89990884669214,136.71752459667877,0.18238425001337077,0.1334022471160229,0.11113957413522965,0.1139905600539111,-0.0028509859186814507,-2.50107194607439,0.06427159998376095,0.06846858821082077,-0.004196988227059828,-6.12980103246314,0.9314501501825461,0.9313053225630614,0.0001448276194846443,0.015551035302371268,0.44833333333333336,0.25,0.19833333333333336,79.33333333333334,4.7183804755060255,,,,49.04307009982127,55.2030005738411,-6.159930474019831,-11.158687770568074,0.10998505830218755,0.11684259343269415,-0.0068575351305066035,-5.869037077182653
|
||||
0.4,4.0,4.0,2983.852437569374,3180.7872854626567,-196.9348478932825,-6.191386918369099,276545.26309355,295433.5405797,-18888.277486150037,-6.393409986248494,136.19210761854086,136.5783021470118,-0.38619452847095204,-0.2827641890402586,0.10875560547061063,0.11189234314151972,-0.0031367376709090927,-2.8033532794480807,0.07452230347799255,0.07104688223410768,0.003475421243884863,4.891729425132195,0.9307282962514367,0.9310542820602117,-0.0003259858087749645,-0.03501254599824534,0.5999999999999999,0.4000000000000001,0.1999999999999998,49.999999999999936,4.174996403604185,,,,47.99794119802058,50.794260008988424,-2.796318810967847,-5.505186630286606,0.10222958892923095,0.11161526349272373,-0.009385674563492777,-8.408952565976458
|
||||
0.6,4.0,4.0,2789.0434220430398,2982.2460998252786,-193.20267778223888,-6.4784283830083,258688.11700405,277051.95613675,-18363.8391327,-6.628301560749781,136.86774320500828,136.81931587629953,0.04842732870875466,0.035395096371142916,0.10501047827147733,0.10802266412956946,-0.0030121858580921257,-2.788475809557069,0.06914180963767007,0.06698591531512615,0.0021558943225439137,3.2184292957732996,0.9314130089130337,0.9313849217310588,2.8087181974889575e-05,0.003015636319588161,0.7733333333333334,0.5999999999999999,0.17333333333333356,28.888888888888935,4.178300996512875,,,,39.928062615509425,47.86860429278531,-7.940541677275881,-16.588203885594947,0.11297979438696983,0.1162670925925253,-0.0032872982055554695,-2.827367686122743
|
||||
0.8,4.0,4.0,2586.098242115281,2841.1305915063504,-255.03234939106915,-8.97643882169642,239765.24959855,264140.55002745,-24375.300428900024,-9.228155399224729,136.5038826686135,137.28163778418497,-0.7777551155714661,-0.5665397995864124,0.10253056902792507,0.1031498585902154,-0.0006192895622903344,-0.6003784888844036,0.07325665736408164,0.06592454978099352,0.007332107583088124,11.1219683827132,0.9311235469993302,0.9316596013994161,-0.0005360544000858614,-0.05753758124541101,1.0,0.8000000000000002,0.19999999999999984,24.99999999999998,3.5384100686094007,,,,37.14414699970415,37.43809775029793,-0.29395075059377973,-0.7851647606519765,0.09990322635678014,0.10432800196112454,-0.0044247756043444,-4.241215705437541
|
||||
|
@@ -1,13 +0,0 @@
|
||||
alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
|
||||
0.0,no_robust,4,3565.2912010160844,52.219179508209216,331300.229069,5038.96659004527,137.28764358955686,0.6434240315013728,0.11861133504329742,0.004019332768284657,0.06445662162531288,0.004080405219050139,0.9317078361627993,0.00038018051704976865,,,,,66.58479574844135,32.282270089830455,0.11615891320235115,0.016558627227281013
|
||||
0.0,robust,4,3379.9042994670963,54.727408939657735,313527.4707462,5408.058196552377,137.08358925982625,1.047386315387148,0.1146626165658294,0.0025627354157035497,0.06687153537785637,0.008577061675868377,0.9315273502623671,0.0007274203134899985,0.18958333333333333,0.02083333333333336,5.553200113221484,0.45981481828856186,61.35134238638615,30.27964905193963,0.12778212146468534,0.027929667978205217
|
||||
0.1,no_robust,4,3458.002436284769,60.75923217871363,321215.477968,6016.373193216596,136.82757579763506,1.1899102161551907,0.11704917861668755,0.0021220259908233973,0.06737596899527175,0.006801136773079149,0.9313276818191593,0.0008352263172197586,0.1,0.0,,,53.288869747139515,18.480340945815023,0.11765277436070229,0.017544197575138736
|
||||
0.1,robust,4,3307.028238366196,35.58495715224888,306772.49146475,3488.2690530060245,137.1182041122497,0.8582218376452346,0.1128546052304944,0.0005963155492967403,0.0685405649303561,0.0050673362512629015,0.9315331673960889,0.0005217376436765336,0.2818749999999999,0.03624999999999999,5.079528726095333,0.6109585102054891,52.44772950699336,29.0263361696475,0.11644381911386253,0.021152545180088765
|
||||
0.25,no_robust,4,3300.5539051855053,50.460978662647115,306522.90003785,4860.668937531515,136.71752459667877,0.7410676951244369,0.1139905600539111,0.003319948537321803,0.06846858821082077,0.008614994548315848,0.9313053225630614,0.0004919872662680591,0.25,0.0,,,55.2030005738411,26.88247558235345,0.11684259343269415,0.013462146346772591
|
||||
0.25,robust,4,3134.3438215278165,64.06834403659167,290691.4771835,6331.196493752059,136.89990884669214,1.3796663751798552,0.11113957413522965,0.0015044942041406348,0.06427159998376095,0.0042331619171274894,0.9314501501825461,0.0008939739741734515,0.44833333333333336,0.0033333333333333518,4.7183804755060255,0.4538389380858333,49.04307009982127,28.20484665432831,0.10998505830218755,0.010731404693185651
|
||||
0.4,no_robust,4,3180.7872854626567,71.87564776824694,295433.5405797,7035.374110540269,136.5783021470118,1.7095219574599192,0.11189234314151972,0.0013821115134030936,0.07104688223410768,0.005766138692685495,0.9310542820602117,0.0013989725050689828,0.4000000000000001,0.0,,,50.794260008988424,24.836708377642946,0.11161526349272373,0.005787749200301594
|
||||
0.4,robust,4,2983.852437569374,45.51290575912758,276545.26309355,4555.1725323898245,136.19210761854086,1.5546063667946701,0.10875560547061063,0.001118798290958954,0.07452230347799255,0.0040446395928049874,0.9307282962514367,0.0013558080014763189,0.5999999999999999,0.0,4.174996403604185,0.12189448324552496,47.99794119802058,33.51782503281748,0.10222958892923095,0.0031686467591609474
|
||||
0.6,no_robust,4,2982.2460998252786,39.93674476199945,277051.95613675,3931.02017169463,136.81931587629953,1.1995405806950865,0.10802266412956946,0.000405835985606262,0.06698591531512615,0.002805894772223563,0.9313849217310588,0.0008100530228792662,0.5999999999999999,0.0,,,47.86860429278531,23.830502772642472,0.1162670925925253,0.028676813474186293
|
||||
0.6,robust,4,2789.0434220430398,35.297482315631626,258688.11700405,3420.6735023624556,136.86774320500828,0.7097303238857778,0.10501047827147733,0.0008273121554488608,0.06914180963767007,0.009066158371268139,0.9314130089130337,0.0005024421703994162,0.7733333333333334,0.053333333333333385,4.178300996512875,0.5865970573865015,39.928062615509425,30.25078643153115,0.11297979438696983,0.0274101056520461
|
||||
0.8,no_robust,4,2841.1305915063504,21.84043179776092,264140.55002745,2073.353315114627,137.28163778418497,0.6288968799501957,0.1031498585902154,0.0012877581835795701,0.06592454978099352,0.00340700896766341,0.9316596013994161,0.00038430108058413553,0.8000000000000002,0.0,,,37.43809775029793,32.01740090550489,0.10432800196112454,0.018337841526911584
|
||||
0.8,robust,4,2586.098242115281,48.05539265296157,239765.24959855,4681.6472175597555,136.5038826686135,1.0611320896043694,0.10253056902792507,0.002587472569909977,0.07325665736408164,0.0015359324114246234,0.9311235469993302,0.0006145440308596868,1.0,0.0,3.5384100686094007,0.391972726035734,37.14414699970415,25.614063825315505,0.09990322635678014,0.010269342031085898
|
||||
|
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"status": "ok",
|
||||
"revenue_delta": -191.29017636530716,
|
||||
"revenue_delta_pct": -5.938226273545598,
|
||||
"coi_leakage_delta": -0.002960415145605702,
|
||||
"coi_leakage_delta_pct": -2.6404147469510946
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
|
||||
no_robust,24,3221.335253213441,262.46595166337727,299277.442303125,24382.561944761477,136.9186666318945,1.0038463876967063,0.11211932326253345,0.005805494533542669,0.06737642102693879,0.005402738047823369,0.9314066076226178,0.0007436370959663933,0.43,0.2546411303445653,,,51.86293802024894,25.340287421525442,0.11381077317368686,0.016664235359362907
|
||||
robust,24,3030.0450768481337,288.262657026656,280998.34484843333,26820.020161880373,136.77757261848845,1.06224696086916,0.10915890811692774,0.004616462637659704,0.06943407846195294,0.006435789449278624,0.9312959200008004,0.0007858424519830652,0.5488541666666666,0.2860373751485706,4.540469463924883,0.7906156355346259,47.985382134405825,27.407657819442747,0.11155393475895271,0.01943348418653492
|
||||
|
@@ -1,25 +0,0 @@
|
||||
alpha,metric,direction,wins,ties,total_pairs,win_probability
|
||||
0.0,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.0,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.0,eval/coi_leakage_mean,lower,14,0,16,0.875
|
||||
0.0,eval/volatility_mean,lower,8,0,16,0.5
|
||||
0.1,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.1,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.1,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||
0.1,eval/volatility_mean,lower,8,0,16,0.5
|
||||
0.25,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.25,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.25,eval/coi_leakage_mean,lower,12,0,16,0.75
|
||||
0.25,eval/volatility_mean,lower,11,0,16,0.6875
|
||||
0.4,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.4,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.4,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||
0.4,eval/volatility_mean,lower,6,0,16,0.375
|
||||
0.6,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.6,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.6,eval/coi_leakage_mean,lower,16,0,16,1.0
|
||||
0.6,eval/volatility_mean,lower,7,0,16,0.4375
|
||||
0.8,eval/revenue_mean,higher,0,0,16,0.0
|
||||
0.8,eval/reward_mean,higher,0,0,16,0.0
|
||||
0.8,eval/coi_leakage_mean,lower,11,0,16,0.6875
|
||||
0.8,eval/volatility_mean,lower,0,0,16,0.0
|
||||
|
@@ -1 +0,0 @@
|
||||
\includegraphics[width=0.99\linewidth]{chapters/figures/results/generated/legacy/plots/first_sweep_tier_revenue.pdf}
|
||||
@@ -1 +0,0 @@
|
||||
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_alpha_curves.pdf}
|
||||
@@ -1 +0,0 @@
|
||||
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_delta_curves.pdf}
|
||||
@@ -1 +0,0 @@
|
||||
\includegraphics[width=0.88\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_tradeoff_scatter.pdf}
|
||||
@@ -1,313 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
from matplotlib.ticker import FuncFormatter
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from process_first_sweep import run as run_first_sweep
|
||||
from process_ppo_benchmark import run as run_ppo_benchmark
|
||||
|
||||
|
||||
def _output_dir() -> Path:
|
||||
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||
|
||||
|
||||
def _plot_dir() -> Path:
|
||||
return _output_dir() / "plots"
|
||||
|
||||
|
||||
def _configure_style() -> None:
|
||||
plt.rcParams.update(
|
||||
{
|
||||
"font.family": "serif",
|
||||
"font.size": 10,
|
||||
"axes.titlesize": 10,
|
||||
"axes.labelsize": 9,
|
||||
"legend.fontsize": 8,
|
||||
"xtick.labelsize": 8,
|
||||
"ytick.labelsize": 8,
|
||||
"figure.dpi": 220,
|
||||
"savefig.dpi": 320,
|
||||
"axes.spines.top": False,
|
||||
"axes.spines.right": False,
|
||||
"axes.grid": True,
|
||||
"grid.alpha": 0.22,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _fmt_thousands(value: float, _: int) -> str:
|
||||
return f"{int(value):,}"
|
||||
|
||||
|
||||
def _load_csv(path: Path) -> pd.DataFrame:
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Missing required input: {path}")
|
||||
return pd.read_csv(path)
|
||||
|
||||
|
||||
def _plot_ppo_alpha_curves(alpha_mode: pd.DataFrame, out_dir: Path) -> Path:
|
||||
fig, axes = plt.subplots(2, 2, figsize=(9.3, 6.4), constrained_layout=True)
|
||||
robust_color = "#C44E52"
|
||||
baseline_color = "#4C72B0"
|
||||
mode_colors = {"robust": robust_color, "no_robust": baseline_color}
|
||||
mode_labels = {"robust": "Robust", "no_robust": "Non-robust"}
|
||||
|
||||
panels = [
|
||||
("eval_revenue_mean", "Mean Episode Revenue", "Revenue"),
|
||||
("eval_reward_mean", "Mean Episode Reward", "Reward"),
|
||||
("eval_coi_leakage_mean", "Mean COI Leakage", "COI Leakage"),
|
||||
("eval_volatility_mean", "Mean Price Volatility", "Volatility"),
|
||||
]
|
||||
|
||||
for ax, (metric_prefix, title, ylabel) in zip(axes.flat, panels):
|
||||
mean_col = f"{metric_prefix}_mean"
|
||||
std_col = f"{metric_prefix}_std"
|
||||
for mode in ("no_robust", "robust"):
|
||||
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
|
||||
if sub.empty:
|
||||
continue
|
||||
x = sub["alpha"].to_numpy(dtype=float)
|
||||
y = sub[mean_col].to_numpy(dtype=float)
|
||||
ax.plot(
|
||||
x,
|
||||
y,
|
||||
marker="o",
|
||||
linewidth=1.8,
|
||||
markersize=4,
|
||||
color=mode_colors[mode],
|
||||
label=mode_labels[mode],
|
||||
)
|
||||
if std_col in sub.columns:
|
||||
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
|
||||
ax.fill_between(
|
||||
x,
|
||||
y - sigma,
|
||||
y + sigma,
|
||||
color=mode_colors[mode],
|
||||
alpha=0.14,
|
||||
linewidth=0,
|
||||
)
|
||||
|
||||
ax.set_title(title)
|
||||
ax.set_xlabel(r"Contamination $\alpha$")
|
||||
ax.set_ylabel(ylabel)
|
||||
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
|
||||
if metric_prefix in {"eval_revenue_mean", "eval_reward_mean"}:
|
||||
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||
|
||||
handles, labels = axes.flat[0].get_legend_handles_labels()
|
||||
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.02))
|
||||
|
||||
out_path = out_dir / "ppo_alpha_curves.pdf"
|
||||
fig.savefig(out_path, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
return out_path
|
||||
|
||||
|
||||
def _plot_ppo_delta_curves(deltas: pd.DataFrame, out_dir: Path) -> Path:
|
||||
fig, axes = plt.subplots(2, 1, figsize=(8.6, 6.0), constrained_layout=True)
|
||||
deltas = deltas.sort_values("alpha")
|
||||
x = deltas["alpha"].to_numpy(dtype=float)
|
||||
|
||||
top_metrics = [
|
||||
("eval_revenue_mean_delta_pct", "Revenue", "#4C72B0"),
|
||||
("eval_reward_mean_delta_pct", "Reward", "#8172B3"),
|
||||
]
|
||||
for col, label, color in top_metrics:
|
||||
axes[0].plot(
|
||||
x,
|
||||
deltas[col].to_numpy(dtype=float),
|
||||
marker="o",
|
||||
linewidth=1.8,
|
||||
markersize=4,
|
||||
color=color,
|
||||
label=label,
|
||||
)
|
||||
axes[0].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||
axes[0].set_title("Robust Minus Non-robust Delta by Contamination")
|
||||
axes[0].set_ylabel("Delta (%)")
|
||||
axes[0].set_xlabel(r"Contamination $\alpha$")
|
||||
axes[0].set_xticks(x)
|
||||
axes[0].legend(loc="lower left")
|
||||
|
||||
bottom_metrics = [
|
||||
("eval_coi_leakage_mean_delta_pct", "COI Leakage", "#55A868"),
|
||||
("eval_volatility_mean_delta_pct", "Volatility", "#DD8452"),
|
||||
]
|
||||
for col, label, color in bottom_metrics:
|
||||
axes[1].plot(
|
||||
x,
|
||||
deltas[col].to_numpy(dtype=float),
|
||||
marker="o",
|
||||
linewidth=1.8,
|
||||
markersize=4,
|
||||
color=color,
|
||||
label=label,
|
||||
)
|
||||
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||
axes[1].set_ylabel("Delta (%)")
|
||||
axes[1].set_xlabel(r"Contamination $\alpha$")
|
||||
axes[1].set_xticks(x)
|
||||
axes[1].legend(loc="lower left")
|
||||
|
||||
out_path = out_dir / "ppo_delta_curves.pdf"
|
||||
fig.savefig(out_path, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
return out_path
|
||||
|
||||
|
||||
def _plot_ppo_tradeoff_scatter(deltas: pd.DataFrame, out_dir: Path) -> Path:
|
||||
fig, ax = plt.subplots(figsize=(6.4, 5.2), constrained_layout=True)
|
||||
data = deltas.sort_values("alpha")
|
||||
x = data["eval_coi_leakage_mean_delta_pct"].to_numpy(dtype=float)
|
||||
y = data["eval_revenue_mean_delta_pct"].to_numpy(dtype=float)
|
||||
alphas = data["alpha"].to_numpy(dtype=float)
|
||||
|
||||
scatter = ax.scatter(
|
||||
x,
|
||||
y,
|
||||
c=alphas,
|
||||
cmap="viridis",
|
||||
s=72,
|
||||
edgecolor="#222222",
|
||||
linewidth=0.5,
|
||||
)
|
||||
for x_i, y_i, alpha in zip(x, y, alphas):
|
||||
ax.annotate(
|
||||
rf"$\alpha={alpha:.2f}$",
|
||||
(x_i, y_i),
|
||||
textcoords="offset points",
|
||||
xytext=(5, 4),
|
||||
fontsize=8,
|
||||
)
|
||||
|
||||
ax.axhline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||
ax.axvline(0.0, color="#555555", linewidth=1.0, linestyle="--")
|
||||
ax.set_xlabel("COI Leakage Delta (%)")
|
||||
ax.set_ylabel("Revenue Delta (%)")
|
||||
ax.set_title("PPO Robust Tradeoff Frontier")
|
||||
cbar = fig.colorbar(scatter, ax=ax)
|
||||
cbar.set_label(r"Contamination $\alpha$")
|
||||
|
||||
out_path = out_dir / "ppo_tradeoff_scatter.pdf"
|
||||
fig.savefig(out_path, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
return out_path
|
||||
|
||||
|
||||
def _plot_first_sweep_tier_revenue(tier_mode: pd.DataFrame, out_dir: Path) -> Path:
|
||||
pivot = (
|
||||
tier_mode.pivot(index="tier", columns="mode", values="eval_revenue_mean_mean")
|
||||
.dropna(subset=["robust", "no_robust"], how="any")
|
||||
.copy()
|
||||
)
|
||||
if pivot.empty:
|
||||
raise ValueError("First sweep tier summary missing robust/non-robust pairs")
|
||||
|
||||
order = sorted(pivot.index.tolist())
|
||||
pivot = pivot.loc[order]
|
||||
delta_pct = 100.0 * (pivot["robust"] - pivot["no_robust"]) / pivot["no_robust"]
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(10.2, 4.3), constrained_layout=True)
|
||||
x = np.arange(len(order))
|
||||
width = 0.36
|
||||
|
||||
axes[0].bar(
|
||||
x - width / 2,
|
||||
pivot["no_robust"].to_numpy(dtype=float),
|
||||
width=width,
|
||||
label="Non-robust",
|
||||
color="#4C72B0",
|
||||
)
|
||||
axes[0].bar(
|
||||
x + width / 2,
|
||||
pivot["robust"].to_numpy(dtype=float),
|
||||
width=width,
|
||||
label="Robust",
|
||||
color="#C44E52",
|
||||
)
|
||||
axes[0].set_xticks(x)
|
||||
axes[0].set_xticklabels(order, rotation=20)
|
||||
axes[0].set_ylabel("Mean Revenue")
|
||||
axes[0].set_yscale("log")
|
||||
axes[0].yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
|
||||
axes[0].set_title("First Sweep Tier Revenue (log scale)")
|
||||
axes[0].legend()
|
||||
|
||||
axes[1].bar(x, delta_pct.to_numpy(dtype=float), color="#55A868", width=0.55)
|
||||
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
|
||||
axes[1].set_xticks(x)
|
||||
axes[1].set_xticklabels(order, rotation=20)
|
||||
axes[1].set_ylabel("Revenue Delta (%)")
|
||||
axes[1].set_title("Robust Minus Non-robust by Tier")
|
||||
|
||||
out_path = out_dir / "first_sweep_tier_revenue.pdf"
|
||||
fig.savefig(out_path, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
return out_path
|
||||
|
||||
|
||||
def build_plots(data_dir: Path, out_dir: Path) -> list[Path]:
|
||||
alpha_mode = _load_csv(data_dir / "ppo_alpha_mode_summary.csv")
|
||||
deltas = _load_csv(data_dir / "ppo_alpha_deltas.csv")
|
||||
tier_mode = _load_csv(data_dir / "first_sweep_tier_mode_summary.csv")
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
paths = [
|
||||
_plot_ppo_alpha_curves(alpha_mode, out_dir),
|
||||
_plot_ppo_delta_curves(deltas, out_dir),
|
||||
_plot_ppo_tradeoff_scatter(deltas, out_dir),
|
||||
_plot_first_sweep_tier_revenue(tier_mode, out_dir),
|
||||
]
|
||||
return paths
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create paper-ready plots from result CSVs"
|
||||
)
|
||||
parser.add_argument("--data-dir", type=Path, default=_output_dir())
|
||||
parser.add_argument("--plot-dir", type=Path, default=_plot_dir())
|
||||
parser.add_argument(
|
||||
"--refresh-data",
|
||||
action="store_true",
|
||||
help="Regenerate processed CSVs before plotting",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
_configure_style()
|
||||
|
||||
if bool(args.refresh_data):
|
||||
run_ppo_benchmark(
|
||||
input_path=Path(__file__).resolve().parents[5]
|
||||
/ "tpu_orchestration"
|
||||
/ "results"
|
||||
/ "ppo_benchmark.csv",
|
||||
output_dir=args.data_dir,
|
||||
include_non_finished=False,
|
||||
)
|
||||
run_first_sweep(
|
||||
input_path=Path(__file__).resolve().parents[5]
|
||||
/ "tpu_orchestration"
|
||||
/ "results"
|
||||
/ "first_sweep.csv",
|
||||
output_dir=args.data_dir,
|
||||
include_non_finished=False,
|
||||
top_n=25,
|
||||
)
|
||||
|
||||
outputs = build_plots(data_dir=args.data_dir, out_dir=args.plot_dir)
|
||||
for path in outputs:
|
||||
print(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,51 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from process_first_sweep import run as run_first_sweep
|
||||
from process_ppo_benchmark import run as run_ppo_benchmark
|
||||
|
||||
|
||||
def _default_output_dir() -> Path:
|
||||
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process all result CSV exports for paper figures"
|
||||
)
|
||||
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||
parser.add_argument("--include-non-finished", action="store_true")
|
||||
parser.add_argument("--top-n", type=int, default=25)
|
||||
args = parser.parse_args()
|
||||
|
||||
written: list[Path] = []
|
||||
written.extend(
|
||||
run_ppo_benchmark(
|
||||
input_path=Path(__file__).resolve().parents[5]
|
||||
/ "tpu_orchestration"
|
||||
/ "results"
|
||||
/ "ppo_benchmark.csv",
|
||||
output_dir=args.output_dir,
|
||||
include_non_finished=bool(args.include_non_finished),
|
||||
)
|
||||
)
|
||||
written.extend(
|
||||
run_first_sweep(
|
||||
input_path=Path(__file__).resolve().parents[5]
|
||||
/ "tpu_orchestration"
|
||||
/ "results"
|
||||
/ "first_sweep.csv",
|
||||
output_dir=args.output_dir,
|
||||
include_non_finished=bool(args.include_non_finished),
|
||||
top_n=int(args.top_n),
|
||||
)
|
||||
)
|
||||
|
||||
for path in written:
|
||||
print(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -639,7 +639,7 @@ def run(
|
||||
)
|
||||
)
|
||||
|
||||
include_dir = Path(__file__).resolve().parent / "includes" / "final"
|
||||
include_dir = Path(__file__).resolve().parent / "includes"
|
||||
written.append(
|
||||
_write_include(
|
||||
include_dir / "final_focus_revenue_by_alpha.tex",
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _project_root() -> Path:
|
||||
return Path(__file__).resolve().parents[5]
|
||||
|
||||
|
||||
def _default_input() -> Path:
|
||||
return _project_root() / "tpu_orchestration" / "results" / "first_sweep.csv"
|
||||
|
||||
|
||||
def _default_output_dir() -> Path:
|
||||
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||
|
||||
|
||||
def _sanitize(key: str) -> str:
|
||||
return key.replace("/", "_").replace("-", "_")
|
||||
|
||||
|
||||
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
|
||||
for column in columns:
|
||||
if column in frame.columns:
|
||||
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||
|
||||
|
||||
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
|
||||
if "study/alpha" in frame.columns:
|
||||
return pd.to_numeric(frame["study/alpha"], errors="coerce")
|
||||
if "alpha" in frame.columns:
|
||||
return pd.to_numeric(frame["alpha"], errors="coerce")
|
||||
return pd.Series(np.nan, index=frame.index, dtype=float)
|
||||
|
||||
|
||||
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
|
||||
if "study/mode" in frame.columns:
|
||||
return frame["study/mode"].astype(str).str.strip().str.lower()
|
||||
if "study/no_robust" in frame.columns:
|
||||
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
|
||||
return pd.Series(
|
||||
np.where(no_robust > 0.5, "no_robust", "robust"),
|
||||
index=frame.index,
|
||||
dtype="object",
|
||||
)
|
||||
if "no_robust" in frame.columns:
|
||||
no_robust = (
|
||||
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
|
||||
)
|
||||
return pd.Series(
|
||||
np.where(no_robust, "no_robust", "robust"),
|
||||
index=frame.index,
|
||||
dtype="object",
|
||||
)
|
||||
return pd.Series("", index=frame.index, dtype="object")
|
||||
|
||||
|
||||
def _extract_tier(frame: pd.DataFrame) -> pd.Series:
|
||||
for column in ("tiers", "runtime/backend", "algo", "run.backend", "run.algo"):
|
||||
if column in frame.columns:
|
||||
tier = frame[column].astype(str).str.strip().str.lower()
|
||||
if tier.notna().any():
|
||||
return tier
|
||||
return pd.Series("unknown", index=frame.index, dtype="object")
|
||||
|
||||
|
||||
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
|
||||
data = frame.copy()
|
||||
if not include_non_finished and "State" in data.columns:
|
||||
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
|
||||
|
||||
data["alpha"] = _extract_alpha(data)
|
||||
data["mode"] = _extract_mode(data)
|
||||
data["tier"] = _extract_tier(data)
|
||||
data = data[data["mode"].isin({"robust", "no_robust"})]
|
||||
data = data[data["alpha"].notna()]
|
||||
|
||||
_coerce_numeric(
|
||||
data,
|
||||
[
|
||||
"eval/revenue_mean",
|
||||
"eval/reward_mean",
|
||||
"eval/coi_level_mean",
|
||||
"eval/coi_leakage_mean",
|
||||
"eval/margin_mean",
|
||||
"eval/volatility_mean",
|
||||
"objective/score",
|
||||
"train/alpha_adv",
|
||||
"lambda_coi",
|
||||
"robust_radius",
|
||||
"learning_rate",
|
||||
"batch_size",
|
||||
"n_steps",
|
||||
"total_timesteps",
|
||||
],
|
||||
)
|
||||
return data.sort_values(["tier", "alpha", "mode"]).reset_index(drop=True)
|
||||
|
||||
|
||||
def _group_summary(
|
||||
frame: pd.DataFrame, by: list[str], metrics: list[str]
|
||||
) -> pd.DataFrame:
|
||||
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||
for metric in metrics:
|
||||
safe = _sanitize(metric)
|
||||
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||
return frame.groupby(by, as_index=False).agg(**agg_spec).sort_values(by)
|
||||
|
||||
|
||||
def _tier_alpha_deltas(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||
rows: list[dict[str, float | str]] = []
|
||||
for (tier, alpha), group in summary.groupby(["tier", "alpha"], sort=True):
|
||||
robust = group[group["mode"] == "robust"]
|
||||
no_robust = group[group["mode"] == "no_robust"]
|
||||
if robust.empty or no_robust.empty:
|
||||
continue
|
||||
|
||||
row: dict[str, float | str] = {
|
||||
"tier": str(tier),
|
||||
"alpha": float(alpha),
|
||||
"runs_robust": float(robust["runs"].iloc[0]),
|
||||
"runs_no_robust": float(no_robust["runs"].iloc[0]),
|
||||
}
|
||||
for metric in metrics:
|
||||
safe = _sanitize(metric)
|
||||
robust_value = float(robust[f"{safe}_mean"].iloc[0])
|
||||
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
|
||||
delta = robust_value - no_robust_value
|
||||
row[f"{safe}_delta"] = delta
|
||||
row[f"{safe}_delta_pct"] = (
|
||||
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
|
||||
)
|
||||
rows.append(row)
|
||||
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
|
||||
def _top_runs(frame: pd.DataFrame, n: int) -> pd.DataFrame:
|
||||
rank_metric = "objective/score"
|
||||
if rank_metric not in frame.columns or frame[rank_metric].notna().sum() == 0:
|
||||
rank_metric = "eval/reward_mean"
|
||||
|
||||
keep = [
|
||||
"Name",
|
||||
"tier",
|
||||
"alpha",
|
||||
"mode",
|
||||
rank_metric,
|
||||
"eval/revenue_mean",
|
||||
"eval/reward_mean",
|
||||
"eval/coi_level_mean",
|
||||
"eval/coi_leakage_mean",
|
||||
"lambda_coi",
|
||||
"robust_radius",
|
||||
"learning_rate",
|
||||
"batch_size",
|
||||
"n_steps",
|
||||
"total_timesteps",
|
||||
]
|
||||
present = [column for column in keep if column in frame.columns]
|
||||
ranked = frame[present].copy().sort_values(rank_metric, ascending=False)
|
||||
return ranked.head(max(1, int(n))).reset_index(drop=True)
|
||||
|
||||
|
||||
def _headline_json(
|
||||
frame: pd.DataFrame, tier_mode: pd.DataFrame
|
||||
) -> dict[str, float | str]:
|
||||
out: dict[str, float | str] = {
|
||||
"runs": int(len(frame)),
|
||||
"tiers": int(frame["tier"].nunique()),
|
||||
"alphas": int(frame["alpha"].nunique()),
|
||||
}
|
||||
|
||||
robust_rows = tier_mode[tier_mode["mode"] == "robust"]
|
||||
no_robust_rows = tier_mode[tier_mode["mode"] == "no_robust"]
|
||||
if robust_rows.empty or no_robust_rows.empty:
|
||||
out["status"] = "incomplete_modes"
|
||||
return out
|
||||
|
||||
robust_mean = robust_rows["eval_revenue_mean_mean"].mean()
|
||||
no_robust_mean = no_robust_rows["eval_revenue_mean_mean"].mean()
|
||||
out.update(
|
||||
{
|
||||
"status": "ok",
|
||||
"mean_tier_revenue_robust": float(robust_mean),
|
||||
"mean_tier_revenue_no_robust": float(no_robust_mean),
|
||||
"mean_tier_revenue_delta": float(robust_mean - no_robust_mean),
|
||||
"mean_tier_revenue_delta_pct": float(
|
||||
100.0 * (robust_mean - no_robust_mean) / no_robust_mean
|
||||
)
|
||||
if no_robust_mean
|
||||
else np.nan,
|
||||
}
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def run(
|
||||
input_path: Path, output_dir: Path, include_non_finished: bool, top_n: int
|
||||
) -> list[Path]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
raw = pd.read_csv(input_path)
|
||||
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
|
||||
|
||||
metrics = [
|
||||
metric
|
||||
for metric in (
|
||||
"eval/revenue_mean",
|
||||
"eval/reward_mean",
|
||||
"eval/coi_level_mean",
|
||||
"eval/coi_leakage_mean",
|
||||
"eval/margin_mean",
|
||||
"eval/volatility_mean",
|
||||
"objective/score",
|
||||
"train/alpha_adv",
|
||||
)
|
||||
if metric in frame.columns
|
||||
]
|
||||
|
||||
tier_mode = _group_summary(frame, ["tier", "mode"], metrics)
|
||||
tier_alpha_mode = _group_summary(frame, ["tier", "alpha", "mode"], metrics)
|
||||
deltas = _tier_alpha_deltas(tier_alpha_mode, metrics)
|
||||
top_configs = _top_runs(frame, n=top_n)
|
||||
headline = _headline_json(frame, tier_mode)
|
||||
|
||||
outputs = {
|
||||
"first_sweep_tier_mode_summary.csv": tier_mode,
|
||||
"first_sweep_tier_alpha_mode_summary.csv": tier_alpha_mode,
|
||||
"first_sweep_tier_alpha_deltas.csv": deltas,
|
||||
"first_sweep_top_configs.csv": top_configs,
|
||||
}
|
||||
written_paths: list[Path] = []
|
||||
for filename, table in outputs.items():
|
||||
path = output_dir / filename
|
||||
table.to_csv(path, index=False)
|
||||
written_paths.append(path)
|
||||
|
||||
headline_path = output_dir / "first_sweep_headline_summary.json"
|
||||
headline_path.write_text(json.dumps(headline, indent=2))
|
||||
written_paths.append(headline_path)
|
||||
return written_paths
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process first sweep CSV for paper tables"
|
||||
)
|
||||
parser.add_argument("--input", type=Path, default=_default_input())
|
||||
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||
parser.add_argument("--include-non-finished", action="store_true")
|
||||
parser.add_argument("--top-n", type=int, default=25)
|
||||
args = parser.parse_args()
|
||||
|
||||
written = run(
|
||||
input_path=args.input,
|
||||
output_dir=args.output_dir,
|
||||
include_non_finished=bool(args.include_non_finished),
|
||||
top_n=int(args.top_n),
|
||||
)
|
||||
for path in written:
|
||||
print(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,277 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _project_root() -> Path:
|
||||
return Path(__file__).resolve().parents[5]
|
||||
|
||||
|
||||
def _default_input() -> Path:
|
||||
return _project_root() / "tpu_orchestration" / "results" / "ppo_benchmark.csv"
|
||||
|
||||
|
||||
def _default_output_dir() -> Path:
|
||||
return Path(__file__).resolve().parent / "generated" / "legacy"
|
||||
|
||||
|
||||
def _sanitize(key: str) -> str:
|
||||
return key.replace("/", "_").replace("-", "_")
|
||||
|
||||
|
||||
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
|
||||
for column in columns:
|
||||
if column in frame.columns:
|
||||
frame[column] = pd.to_numeric(frame[column], errors="coerce")
|
||||
|
||||
|
||||
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
|
||||
if "study/alpha" in frame.columns:
|
||||
return pd.to_numeric(frame["study/alpha"], errors="coerce")
|
||||
if "alpha" in frame.columns:
|
||||
return pd.to_numeric(frame["alpha"], errors="coerce")
|
||||
return pd.Series(np.nan, index=frame.index, dtype=float)
|
||||
|
||||
|
||||
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
|
||||
if "study/mode" in frame.columns:
|
||||
return frame["study/mode"].astype(str).str.strip().str.lower()
|
||||
if "study/no_robust" in frame.columns:
|
||||
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
|
||||
return pd.Series(
|
||||
np.where(no_robust > 0.5, "no_robust", "robust"),
|
||||
index=frame.index,
|
||||
dtype="object",
|
||||
)
|
||||
if "no_robust" in frame.columns:
|
||||
no_robust = (
|
||||
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
|
||||
)
|
||||
return pd.Series(
|
||||
np.where(no_robust, "no_robust", "robust"),
|
||||
index=frame.index,
|
||||
dtype="object",
|
||||
)
|
||||
return pd.Series("", index=frame.index, dtype="object")
|
||||
|
||||
|
||||
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
|
||||
data = frame.copy()
|
||||
if not include_non_finished and "State" in data.columns:
|
||||
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
|
||||
|
||||
data["alpha"] = _extract_alpha(data)
|
||||
data["mode"] = _extract_mode(data)
|
||||
data = data[data["mode"].isin({"robust", "no_robust"})]
|
||||
data = data[data["alpha"].notna()]
|
||||
|
||||
numeric_cols = [
|
||||
"eval/revenue_mean",
|
||||
"eval/reward_mean",
|
||||
"eval/coi_level_mean",
|
||||
"eval/coi_leakage_mean",
|
||||
"eval/volatility_mean",
|
||||
"eval/margin_mean",
|
||||
"train/alpha_adv",
|
||||
"train/coi_penalty",
|
||||
"train/ux_penalty",
|
||||
"train/agent_prob",
|
||||
]
|
||||
_coerce_numeric(data, numeric_cols)
|
||||
return data.sort_values(["alpha", "mode"]).reset_index(drop=True)
|
||||
|
||||
|
||||
def _summary_by_alpha_mode(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||
for metric in metrics:
|
||||
safe = _sanitize(metric)
|
||||
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||
|
||||
return (
|
||||
frame.groupby(["alpha", "mode"], as_index=False)
|
||||
.agg(**agg_spec)
|
||||
.sort_values(["alpha", "mode"])
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
|
||||
def _delta_by_alpha(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||
rows: list[dict[str, float]] = []
|
||||
for alpha, alpha_group in summary.groupby("alpha", sort=True):
|
||||
robust = alpha_group[alpha_group["mode"] == "robust"]
|
||||
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
|
||||
if robust.empty or no_robust.empty:
|
||||
continue
|
||||
|
||||
row: dict[str, float] = {
|
||||
"alpha": float(alpha),
|
||||
"runs_robust": float(robust["runs"].iloc[0]),
|
||||
"runs_no_robust": float(no_robust["runs"].iloc[0]),
|
||||
}
|
||||
for metric in metrics:
|
||||
safe = _sanitize(metric)
|
||||
robust_value = float(robust[f"{safe}_mean"].iloc[0])
|
||||
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
|
||||
delta = robust_value - no_robust_value
|
||||
row[f"{safe}_robust"] = robust_value
|
||||
row[f"{safe}_no_robust"] = no_robust_value
|
||||
row[f"{safe}_delta"] = delta
|
||||
row[f"{safe}_delta_pct"] = (
|
||||
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
|
||||
)
|
||||
rows.append(row)
|
||||
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
|
||||
def _pairwise_win_rates(frame: pd.DataFrame) -> pd.DataFrame:
|
||||
rules = {
|
||||
"eval/revenue_mean": "higher",
|
||||
"eval/reward_mean": "higher",
|
||||
"eval/coi_leakage_mean": "lower",
|
||||
"eval/volatility_mean": "lower",
|
||||
}
|
||||
rows: list[dict[str, float]] = []
|
||||
for alpha, alpha_group in frame.groupby("alpha", sort=True):
|
||||
robust = alpha_group[alpha_group["mode"] == "robust"]
|
||||
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
|
||||
if robust.empty or no_robust.empty:
|
||||
continue
|
||||
|
||||
for metric, direction in rules.items():
|
||||
if metric not in frame.columns:
|
||||
continue
|
||||
robust_values = robust[metric].dropna().to_numpy(dtype=float)
|
||||
no_robust_values = no_robust[metric].dropna().to_numpy(dtype=float)
|
||||
if robust_values.size == 0 or no_robust_values.size == 0:
|
||||
continue
|
||||
|
||||
if direction == "higher":
|
||||
wins = (robust_values[:, None] > no_robust_values[None, :]).sum()
|
||||
else:
|
||||
wins = (robust_values[:, None] < no_robust_values[None, :]).sum()
|
||||
ties = (robust_values[:, None] == no_robust_values[None, :]).sum()
|
||||
total = robust_values.size * no_robust_values.size
|
||||
win_prob = (wins + 0.5 * ties) / total
|
||||
rows.append(
|
||||
{
|
||||
"alpha": float(alpha),
|
||||
"metric": metric,
|
||||
"direction": direction,
|
||||
"wins": int(wins),
|
||||
"ties": int(ties),
|
||||
"total_pairs": int(total),
|
||||
"win_probability": float(win_prob),
|
||||
}
|
||||
)
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
|
||||
def _overall_mode_summary(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
|
||||
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
|
||||
for metric in metrics:
|
||||
safe = _sanitize(metric)
|
||||
agg_spec[f"{safe}_mean"] = (metric, "mean")
|
||||
agg_spec[f"{safe}_std"] = (metric, "std")
|
||||
return frame.groupby("mode", as_index=False).agg(**agg_spec).sort_values("mode")
|
||||
|
||||
|
||||
def _headline_json(overall: pd.DataFrame) -> dict[str, float | str]:
|
||||
if {"robust", "no_robust"} - set(overall["mode"].tolist()):
|
||||
return {"status": "incomplete_modes"}
|
||||
|
||||
robust = overall[overall["mode"] == "robust"].iloc[0]
|
||||
no_robust = overall[overall["mode"] == "no_robust"].iloc[0]
|
||||
|
||||
revenue_delta = float(
|
||||
robust["eval_revenue_mean_mean"] - no_robust["eval_revenue_mean_mean"]
|
||||
)
|
||||
leakage_delta = float(
|
||||
robust["eval_coi_leakage_mean_mean"] - no_robust["eval_coi_leakage_mean_mean"]
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"revenue_delta": revenue_delta,
|
||||
"revenue_delta_pct": float(
|
||||
100.0 * revenue_delta / no_robust["eval_revenue_mean_mean"]
|
||||
),
|
||||
"coi_leakage_delta": leakage_delta,
|
||||
"coi_leakage_delta_pct": float(
|
||||
100.0 * leakage_delta / no_robust["eval_coi_leakage_mean_mean"]
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def run(input_path: Path, output_dir: Path, include_non_finished: bool) -> list[Path]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
raw = pd.read_csv(input_path)
|
||||
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
|
||||
|
||||
metrics = [
|
||||
metric
|
||||
for metric in (
|
||||
"eval/revenue_mean",
|
||||
"eval/reward_mean",
|
||||
"eval/coi_level_mean",
|
||||
"eval/coi_leakage_mean",
|
||||
"eval/volatility_mean",
|
||||
"eval/margin_mean",
|
||||
"train/alpha_adv",
|
||||
"train/coi_penalty",
|
||||
"train/ux_penalty",
|
||||
"train/agent_prob",
|
||||
)
|
||||
if metric in frame.columns
|
||||
]
|
||||
|
||||
alpha_mode = _summary_by_alpha_mode(frame, metrics)
|
||||
deltas = _delta_by_alpha(alpha_mode, metrics)
|
||||
win_rates = _pairwise_win_rates(frame)
|
||||
overall = _overall_mode_summary(frame, metrics)
|
||||
headline = _headline_json(overall)
|
||||
|
||||
outputs = {
|
||||
"ppo_alpha_mode_summary.csv": alpha_mode,
|
||||
"ppo_alpha_deltas.csv": deltas,
|
||||
"ppo_pairwise_win_rates.csv": win_rates,
|
||||
"ppo_overall_mode_summary.csv": overall,
|
||||
}
|
||||
written_paths: list[Path] = []
|
||||
for filename, table in outputs.items():
|
||||
path = output_dir / filename
|
||||
table.to_csv(path, index=False)
|
||||
written_paths.append(path)
|
||||
|
||||
headline_path = output_dir / "ppo_headline_summary.json"
|
||||
headline_path.write_text(json.dumps(headline, indent=2))
|
||||
written_paths.append(headline_path)
|
||||
return written_paths
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Process PPO benchmark CSV for paper tables"
|
||||
)
|
||||
parser.add_argument("--input", type=Path, default=_default_input())
|
||||
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
|
||||
parser.add_argument("--include-non-finished", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
written = run(
|
||||
input_path=args.input,
|
||||
output_dir=args.output_dir,
|
||||
include_non_finished=bool(args.include_non_finished),
|
||||
)
|
||||
for path in written:
|
||||
print(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,63 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
|
||||
|
||||
root = Path(__file__).resolve().parents[5]
|
||||
runs = (
|
||||
root
|
||||
/ "engine/studies/results/wandb_sweep_bundles/bundle_20260317_122818/runs_finished.csv"
|
||||
)
|
||||
|
||||
df = pd.read_csv(runs)
|
||||
df = df[
|
||||
(df["sweep_id"].astype(str) == "i88nw811")
|
||||
& (df["study_mode"].astype(str) == "baseline")
|
||||
& (pd.to_numeric(df["n_products"], errors="coerce") == 100.0)
|
||||
& (pd.to_numeric(df["eta_ux"], errors="coerce") == 0.0)
|
||||
].copy()
|
||||
|
||||
alpha = pd.to_numeric(df["alpha"], errors="coerce")
|
||||
revenue = pd.to_numeric(df["eval_revenue_mean"], errors="coerce")
|
||||
mask = alpha.notna() & revenue.notna()
|
||||
alpha = alpha[mask].to_numpy(dtype=float)
|
||||
revenue = revenue[mask].to_numpy(dtype=float)
|
||||
|
||||
if len(alpha) < 3 or np.unique(alpha).size < 2:
|
||||
raise ValueError("Not enough data for regression")
|
||||
|
||||
fit = stats.linregress(alpha, revenue)
|
||||
n = len(alpha)
|
||||
dof = n - 2
|
||||
t_stat = fit.slope / fit.stderr
|
||||
p_val = 2.0 * stats.t.sf(abs(t_stat), df=dof)
|
||||
r2 = fit.rvalue**2
|
||||
t_crit = stats.t.ppf(0.975, dof)
|
||||
slope_ci = (fit.slope - t_crit * fit.stderr, fit.slope + t_crit * fit.stderr)
|
||||
|
||||
x = np.column_stack([np.ones(n), alpha])
|
||||
beta = np.linalg.lstsq(x, revenue, rcond=None)[0]
|
||||
resid = revenue - x @ beta
|
||||
xtx_inv = np.linalg.pinv(x.T @ x)
|
||||
meat = (x * resid[:, None]).T @ (x * resid[:, None])
|
||||
cov_hc1 = (n / (n - x.shape[1])) * (xtx_inv @ meat @ xtx_inv)
|
||||
se_hc1 = np.sqrt(np.diag(cov_hc1))
|
||||
t_hc1 = beta[1] / se_hc1[1]
|
||||
p_hc1 = 2.0 * stats.t.sf(abs(t_hc1), df=dof)
|
||||
slope_ci_hc1 = (beta[1] - t_crit * se_hc1[1], beta[1] + t_crit * se_hc1[1])
|
||||
|
||||
print("Contamination-Revenue Slope")
|
||||
print(
|
||||
"cohort: bundle_20260317_122818, sweep=i88nw811, mode=baseline, n_products=100, eta_ux=0.0"
|
||||
)
|
||||
print(f"n={n}")
|
||||
print(f"model: revenue = {fit.intercept:.2f} {fit.slope:+.2f} * alpha")
|
||||
print(
|
||||
f"OLS: t({dof})={t_stat:.2f}, p={p_val:.3e}, R^2={r2:.3f}, slope_95CI=[{slope_ci[0]:.2f}, {slope_ci[1]:.2f}]"
|
||||
)
|
||||
print(
|
||||
f"HC1: t={t_hc1:.2f}, p={p_hc1:.3e}, slope_95CI=[{slope_ci_hc1[0]:.2f}, {slope_ci_hc1[1]:.2f}]"
|
||||
)
|
||||
print(f"effect: +0.1 alpha -> {0.1 * fit.slope:.2f} revenue units")
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user