This commit is contained in:
2026-04-08 11:58:20 +02:00
parent 291472295b
commit e18a3e7363
36 changed files with 28 additions and 1186 deletions

View File

@@ -44,7 +44,7 @@ SWEEP_ENV_LOAD = set -a; [ -f "$(SWEEP_ENV_FILE)" ] && . "$(SWEEP_ENV_FILE)" ||
.PHONY: help
help:
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.render manim.render.all"
@echo "pdf.build pdf.watch pdf.clean pdf.genpop pdf.genpop.watch pdf.arxiv | test.backend test.e2e test.all | web.dev | install | train | benchmark | benchmark.simple | benchmark.agent | train.agent | train.bootstrap | stats.lines | manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all"
@echo "backend.server backend.provider backend.worker | platform.up platform.down platform.logs | docker.train.publish"
@echo "data.pull data.push data.whoclicked.publish | study.margin-erosion study.margin-erosion.quick study.margin-erosion.plot"
@echo "tpu.ray.bootstrap tpu.ray.deps tpu.ray.verify tpu.ray.teardown"
@@ -235,9 +235,25 @@ count-lines:
all:
@$(NX) run paper:build
.PHONY: manim.render manim.render.all
.PHONY: manim.defense manim.defense.hq manim.render manim.render.full manim.render.poster manim.render.appendix manim.render.all
# Main defense reel (paper/defense/manim/render_defense); uses paper/defense/.venv when present
manim.defense:
@cd paper/defense/manim && ./render_defense full
manim.defense.hq:
@cd paper/defense/manim && ./render_defense full --quality qh
manim.render:
@$(NX) run manim:render
manim.render.full:
@$(NX) run manim:render-full
manim.render.poster:
@$(NX) run manim:render-poster
manim.render.appendix:
@$(NX) run manim:render-appendix
manim.render.all:
@$(NX) run manim:render-all

View File

@@ -1,2 +0,0 @@
manim>=0.18,<1
numpy>=1.24

View File

@@ -304,7 +304,7 @@ $\mathcal{A}_{\text{filter}}$ & \texttt{search}, \texttt{filter\_date}, \texttt{
This partition enables the weight function $\omega$ from Eq.~\ref{eq:qhat} to assign category-specific signal strengths, with $\omega(\mathcal{A}_{\text{cart}}) > \omega(\mathcal{A}_{\text{dwell}}) > \omega(\mathcal{A}_{\text{nav}}) > \omega(\mathcal{A}_{\text{filter}})$ reflecting decreasing commitment.
It's important to acknowledge that this creates a very blatant assumption in the weighting, and we motivate the scale of each weight by the per-category observed divergence between each behavioral profile.
In the simulator baseline this order is encoded with a compact fixed scale: cart $=4.0$, dwell $=2.0$, nav $=1.0$, filter $=0.5$. Unknown actions are mapped by prefix heuristics to the nearest category.
We back this up by saying that each weight was assigned by observing an initial small dataset and computing KL divergence between each interaction type; the ones with the highest divergence receive a proportionately high weight in our demand estimation.
We back this up by saying that each weight was assigned by observing an initial small dataset and computing KL divergence between each interaction type; the ones with the highest divergence receive a proportionately high weight in our demand estimation. From the order which we observe in divergences, we assign a multiple of 2 increase in weight ascending form the lowest weight of $0.5$ in rare filtering operations.
The metadata record $\mu$ varies by action type. For product views, $\mu$ contains the observed price $p_{\text{obs}}$ and product attributes. For dwell events, $\mu$ includes the element text and accumulated hover duration. This heterogeneous structure is captured via a schema-on-read approach in our Kafka ingestion pipeline, where events are validated against type-specific schemas before storage.

View File

@@ -73,35 +73,35 @@ In our complete training runs we logged $\approx 180$ days of net compute time.
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_revenue_by_alpha.tex}
\input{chapters/figures/results/includes/final_focus_revenue_by_alpha.tex}
\caption{Revenue curves by contamination for the final cohort. The baseline remains above the defended curve in most cells, but the gap narrows in the high-contamination region.}
\label{fig:final_focus_revenue_by_alpha}
\end{figure}
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_coi_by_alpha.tex}
\input{chapters/figures/results/includes/final_focus_coi_by_alpha.tex}
\caption{COI level curves by contamination for the final cohort. The shaded band marks the per-$\alpha$ gap between defended and baseline policies.}
\label{fig:final_focus_coi_by_alpha}
\end{figure}
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_coi_preservation_grid.tex}
\input{chapters/figures/results/includes/final_focus_coi_preservation_grid.tex}
\caption{COI preservation by product count at the contamination endpoints ($\alpha=0.0$ and $\alpha=1.0$). Bars report defended-minus-baseline mean COI level, with the zero line separating preservation from erosion.}
\label{fig:final_focus_coi_preservation_grid}
\end{figure}
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_revenue_delta.tex}
\input{chapters/figures/results/includes/final_focus_revenue_delta.tex}
\caption{Defended-minus-baseline revenue delta over contamination for the final cohort. The strongest high-contamination deviation begins at $\alpha=0.7$, followed by recovery toward near parity by $\alpha=1.0$.}
\label{fig:final_focus_revenue_delta}
\end{figure}
\begin{figure}[ht]
\centering
\input{chapters/figures/results/includes/final/final_focus_risk_deltas.tex}
\input{chapters/figures/results/includes/final_focus_risk_deltas.tex}
\caption{Defended-minus-baseline leakage and volatility deltas for the final cohort. Leakage remains lower for the defended policy across the full contamination range.}
\label{fig:final_focus_risk_deltas}
\end{figure}

View File

@@ -21,4 +21,6 @@ Now we very explicitly mention what we contribute in this paper:
\subsection{Future Works and Next Steps}
During the eights months of research dedicated to this work, a plethora of opportunities and industry gaps was identified, sadly a majority of which could not be addressed directly.
In our effort to tackle this work we initiated a set of constraints which we hope to relax in future iterations and hope that some of these will be addressed in industry. First of these constraints is the weighting of different actions within the demand estimation, which we would ideally find through learned methodology. Next, assumption of perfect alternating turns between the platform and the market calls for a fixed length non-strictly alternating state definition with a history of actions to possibly allow for the development of multi agentic or multi platform simulation. In our simulation we also make assumptions of non-perishable supply of items, which creates the biggest sim-to-real gap in our system. We also would like to further remove intra-session stationary nature of the contamination parameter to further create high-fidelity non-stationarity within a single evaluation window.
For deployment of this it is advised to collect a higher sample size of human baselines and to complement this with the simulated agentic sessions and to mind the matrix scaling for very large catalog sizes.

View File

@@ -1,10 +0,0 @@
{
"runs": 340,
"tiers": 5,
"alphas": 6,
"status": "ok",
"mean_tier_revenue_robust": 190714.62212212436,
"mean_tier_revenue_no_robust": 197371.17216609977,
"mean_tier_revenue_delta": -6656.5500439754105,
"mean_tier_revenue_delta_pct": -3.3726050116242514
}

View File

@@ -1,31 +0,0 @@
tier,alpha,runs_robust,runs_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_margin_mean_delta,eval_margin_mean_delta_pct,objective_score_delta,objective_score_delta_pct,train_alpha_adv_delta,train_alpha_adv_delta_pct
dqn,0.0,5.0,2.0,-31308.987414117495,-8.73651226889534,-1909.7427407095092,-0.5742991901121623,-2.8982436567700063,-2.1108702433020436,-0.001972064237093285,-0.2116777198290971,-1909.7427407095092,-0.5742991901121623,,
dqn,0.1,8.0,4.0,-7723.542755668925,-2.2789188721535494,-74239.37371836061,-21.063854618469847,1.7435833801418141,1.2859365583872486,0.0011891962142838164,0.1278074871971924,-74239.37371836061,-21.063854618469847,0.17619791666666657,176.19791666666694
dqn,0.25,7.0,3.0,-12344.82818986749,-3.7035466052614323,93154.03627578515,36.06691230407512,0.03214544949867104,0.023426184113378143,1.763733457238459e-05,0.001893256490383175,93154.03627578515,36.06691230407512,0.14530952380952394,58.12380952380958
dqn,0.4,5.0,10.0,-7816.300706216833,-2.4694340725162824,-42362.74668471434,-13.411888482380219,0.6251272343707797,0.4579446603861758,0.0002750615520492605,0.02953644634355915,-42362.74668471434,-13.411888482380219,0.09856666666666747,24.64166666666691
dqn,0.6,5.0,4.0,-16150.011887742497,-5.347485987139731,-28508.74710866122,-10.151356300001888,-0.63306323164079,-0.46056970247177387,-0.00034537433455417155,-0.0370668515552649,-28508.74710866122,-10.151356300001888,0.1361999999999981,22.699999999999644
dqn,0.8,7.0,6.0,-18191.8826663699,-6.440527544692988,-55296.94441124235,-20.19273590083627,-0.796733634735034,-0.579832425016392,-0.0006423984775592029,-0.0689476165584585,-55296.94441124235,-20.19273590083627,0.1532857142857158,19.160714285714512
linear,0.0,9.0,8.0,-14967.67388588126,-4.273413942959129,-20107.23171681742,-6.60039931288617,-0.06127790826209889,-0.04564810574240612,-7.607744079518586e-05,-0.008177885913528719,-20107.23171681742,-6.60039931288617,,
linear,0.1,3.0,5.0,-24531.399901538738,-7.171831328305365,-96669.7835552101,-26.44920711447249,-0.3680976907859872,-0.2733723058172187,-0.0002515287835096469,-0.02702956778346356,-96669.7835552101,-26.44920711447249,,
linear,0.25,6.0,9.0,-14840.859479571285,-4.520682292638562,-26510.179456423968,-8.033117756667396,-0.13734776448131925,-0.10212641096230607,-9.41162442338328e-05,-0.010115001392981545,-26510.179456423968,-8.033117756667396,,
linear,0.4,4.0,11.0,-17196.7642560167,-5.486915251242723,-74520.10209817477,-25.042311510043184,0.12217076984330788,0.09098828726103136,0.00010713887099822461,0.011516865671259795,-74520.10209817477,-25.042311510043184,,
linear,0.6,5.0,3.0,-14284.06615788641,-4.854766876637072,38417.71856593515,14.088596762512362,0.24251461234271687,0.1806530855220358,0.0002606811969937395,0.028024824619509187,38417.71856593515,14.088596762512362,,
linear,0.8,4.0,11.0,-10840.488575784548,-3.933600919557566,15749.581078662042,6.447651726824251,0.028051260535562506,0.020876236575910773,5.361882659971062e-05,0.005763158099097226,15749.581078662042,6.447651726824251,,
qtable,0.0,9.0,8.0,-18644.457288398524,-8.15323701554329,32993.42568058451,20.675688115613053,10.369779227648095,10.682768960780463,0.018566897519637582,2.0803084179092814,32993.42568058451,20.675688115613053,0.11839814814814797,
qtable,0.1,6.0,5.0,-12549.400855549495,-4.616991193742389,-37207.79701261924,-15.336047254435487,0.0884057957559321,0.07703761042583206,-0.01127789819771663,-1.2272540823820444,-37207.79701261924,-15.336047254435487,0.07577777777777787,75.77777777777803
qtable,0.25,6.0,5.0,-1534.3527429780224,-0.5456640130847226,18433.43663451099,7.304472653867784,-0.5776125938941306,-0.45734160960552755,-0.003316338490628068,-0.3584028328803385,18433.43663451099,7.304472653867784,0.1181458333333334,47.258333333333354
qtable,0.4,8.0,6.0,-15146.258176090778,-5.274860187729517,-37364.22587794208,-13.005651205148677,0.4611471727478005,0.3629050099230144,0.0071046453227539,0.7751478467862876,-37364.22587794208,-13.005651205148677,0.11010416666666772,27.52604166666698
qtable,0.6,6.0,6.0,-9577.578548656049,-3.9322693501816666,-19088.152339068736,-9.571307395166029,0.9081750157567683,0.7495917946306662,0.0015520804425310786,0.16838348372043557,-19088.152339068736,-9.571307395166029,0.16983333333333228,28.305555555555333
qtable,0.8,5.0,2.0,-52751.680936846446,-19.699089872409548,-16508.209313987172,-7.589601869470744,-15.022454081083623,-11.215398490282094,-0.007791824761087751,-0.8384414846099099,-16508.209313987172,-7.589601869470744,0.11120000000000174,13.900000000000245
static,0.0,5.0,6.0,-4782.871053113384,-5.233544525848519,14411.4689779756,25.538141347978577,1.307060701942973,1.8731997380823568,0.002537468952847566,0.2911381045328444,14411.4689779756,25.538141347978577,,
static,0.1,8.0,5.0,1629.4524528499896,1.880088900553112,-5347.078589385725,-8.14812684380662,0.3600324838305795,0.5019134064795009,-4.6492644957929485e-05,-0.005316014641356001,-5347.078589385725,-8.14812684380662,,
static,0.25,5.0,6.0,-9938.662276761897,-10.398087633377964,-23616.087243780566,-27.701108621456626,-3.0513860773271233,-4.099238223547561,-0.003519771479853273,-0.40113716461596144,-23616.087243780566,-27.701108621456626,,
static,0.4,3.0,4.0,1850.8400595222774,2.1912497828943436,15058.659457798465,23.67199439061036,3.669612467486587,5.430169778169349,0.006763447803564415,0.7804393835882188,15058.659457798465,23.67199439061036,,
static,0.6,6.0,5.0,1038.893948415236,1.2765037688226162,-6062.864079504681,-9.363144945348399,-1.712609061865976,-2.3996341009364213,-0.0042285583442709385,-0.48362088973179423,-6062.864079504681,-9.363144945348399,,
static,0.8,3.0,7.0,2696.6340631967323,3.6826150812750567,149.22406835677975,0.27280281303997084,0.8491716126507072,1.2427748744725668,0.0032786525965587954,0.3777595573932637,149.22406835677975,0.27280281303997084,,
surge,0.0,6.0,6.0,-606.73760243367,-5.066579306500225,-244.17585425326251,-5.525800641331023,0.014874931199557295,0.09186560988877175,0.0019308940532419272,0.4471794260021321,-244.17585425326251,-5.525800641331023,,
surge,0.1,2.0,5.0,169.78743573408792,1.446343107913299,-1012.7706974660168,-20.02053666691211,-0.14459518037699226,-0.864651254901582,-0.0018650458785858248,-0.4260349899970559,-1012.7706974660168,-20.02053666691211,,
surge,0.25,10.0,7.0,-128.20993816584632,-1.1276930411162496,-81.21373487263281,-1.7081453033360994,0.3008506477195141,1.839047728806548,0.0030750148302954305,0.7102446987902812,-81.21373487263281,-1.7081453033360994,,
surge,0.4,6.0,6.0,-473.03722764431404,-4.297928307550563,28.557452243338048,0.6755106104955642,-0.5027452173053764,-3.072002360121898,-0.005581380442163164,-1.288152985482699,28.557452243338048,0.6755106104955642,,
surge,0.6,2.0,5.0,307.79436325796996,3.0356727142643067,2060.57396030564,63.382050333909866,0.2339650444065704,1.438519400758399,0.001302270025389629,0.30077697380833807,2060.57396030564,63.382050333909866,,
surge,0.8,3.0,3.0,423.15386247993047,4.372210191290083,1117.0942083304312,34.86182570616373,0.8971464536957541,5.327339899805159,0.007068630716831503,1.6094191039618562,1117.0942083304312,34.86182570616373,,
1 tier alpha runs_robust runs_no_robust eval_revenue_mean_delta eval_revenue_mean_delta_pct eval_reward_mean_delta eval_reward_mean_delta_pct eval_coi_level_mean_delta eval_coi_level_mean_delta_pct eval_margin_mean_delta eval_margin_mean_delta_pct objective_score_delta objective_score_delta_pct train_alpha_adv_delta train_alpha_adv_delta_pct
2 dqn 0.0 5.0 2.0 -31308.987414117495 -8.73651226889534 -1909.7427407095092 -0.5742991901121623 -2.8982436567700063 -2.1108702433020436 -0.001972064237093285 -0.2116777198290971 -1909.7427407095092 -0.5742991901121623
3 dqn 0.1 8.0 4.0 -7723.542755668925 -2.2789188721535494 -74239.37371836061 -21.063854618469847 1.7435833801418141 1.2859365583872486 0.0011891962142838164 0.1278074871971924 -74239.37371836061 -21.063854618469847 0.17619791666666657 176.19791666666694
4 dqn 0.25 7.0 3.0 -12344.82818986749 -3.7035466052614323 93154.03627578515 36.06691230407512 0.03214544949867104 0.023426184113378143 1.763733457238459e-05 0.001893256490383175 93154.03627578515 36.06691230407512 0.14530952380952394 58.12380952380958
5 dqn 0.4 5.0 10.0 -7816.300706216833 -2.4694340725162824 -42362.74668471434 -13.411888482380219 0.6251272343707797 0.4579446603861758 0.0002750615520492605 0.02953644634355915 -42362.74668471434 -13.411888482380219 0.09856666666666747 24.64166666666691
6 dqn 0.6 5.0 4.0 -16150.011887742497 -5.347485987139731 -28508.74710866122 -10.151356300001888 -0.63306323164079 -0.46056970247177387 -0.00034537433455417155 -0.0370668515552649 -28508.74710866122 -10.151356300001888 0.1361999999999981 22.699999999999644
7 dqn 0.8 7.0 6.0 -18191.8826663699 -6.440527544692988 -55296.94441124235 -20.19273590083627 -0.796733634735034 -0.579832425016392 -0.0006423984775592029 -0.0689476165584585 -55296.94441124235 -20.19273590083627 0.1532857142857158 19.160714285714512
8 linear 0.0 9.0 8.0 -14967.67388588126 -4.273413942959129 -20107.23171681742 -6.60039931288617 -0.06127790826209889 -0.04564810574240612 -7.607744079518586e-05 -0.008177885913528719 -20107.23171681742 -6.60039931288617
9 linear 0.1 3.0 5.0 -24531.399901538738 -7.171831328305365 -96669.7835552101 -26.44920711447249 -0.3680976907859872 -0.2733723058172187 -0.0002515287835096469 -0.02702956778346356 -96669.7835552101 -26.44920711447249
10 linear 0.25 6.0 9.0 -14840.859479571285 -4.520682292638562 -26510.179456423968 -8.033117756667396 -0.13734776448131925 -0.10212641096230607 -9.41162442338328e-05 -0.010115001392981545 -26510.179456423968 -8.033117756667396
11 linear 0.4 4.0 11.0 -17196.7642560167 -5.486915251242723 -74520.10209817477 -25.042311510043184 0.12217076984330788 0.09098828726103136 0.00010713887099822461 0.011516865671259795 -74520.10209817477 -25.042311510043184
12 linear 0.6 5.0 3.0 -14284.06615788641 -4.854766876637072 38417.71856593515 14.088596762512362 0.24251461234271687 0.1806530855220358 0.0002606811969937395 0.028024824619509187 38417.71856593515 14.088596762512362
13 linear 0.8 4.0 11.0 -10840.488575784548 -3.933600919557566 15749.581078662042 6.447651726824251 0.028051260535562506 0.020876236575910773 5.361882659971062e-05 0.005763158099097226 15749.581078662042 6.447651726824251
14 qtable 0.0 9.0 8.0 -18644.457288398524 -8.15323701554329 32993.42568058451 20.675688115613053 10.369779227648095 10.682768960780463 0.018566897519637582 2.0803084179092814 32993.42568058451 20.675688115613053 0.11839814814814797
15 qtable 0.1 6.0 5.0 -12549.400855549495 -4.616991193742389 -37207.79701261924 -15.336047254435487 0.0884057957559321 0.07703761042583206 -0.01127789819771663 -1.2272540823820444 -37207.79701261924 -15.336047254435487 0.07577777777777787 75.77777777777803
16 qtable 0.25 6.0 5.0 -1534.3527429780224 -0.5456640130847226 18433.43663451099 7.304472653867784 -0.5776125938941306 -0.45734160960552755 -0.003316338490628068 -0.3584028328803385 18433.43663451099 7.304472653867784 0.1181458333333334 47.258333333333354
17 qtable 0.4 8.0 6.0 -15146.258176090778 -5.274860187729517 -37364.22587794208 -13.005651205148677 0.4611471727478005 0.3629050099230144 0.0071046453227539 0.7751478467862876 -37364.22587794208 -13.005651205148677 0.11010416666666772 27.52604166666698
18 qtable 0.6 6.0 6.0 -9577.578548656049 -3.9322693501816666 -19088.152339068736 -9.571307395166029 0.9081750157567683 0.7495917946306662 0.0015520804425310786 0.16838348372043557 -19088.152339068736 -9.571307395166029 0.16983333333333228 28.305555555555333
19 qtable 0.8 5.0 2.0 -52751.680936846446 -19.699089872409548 -16508.209313987172 -7.589601869470744 -15.022454081083623 -11.215398490282094 -0.007791824761087751 -0.8384414846099099 -16508.209313987172 -7.589601869470744 0.11120000000000174 13.900000000000245
20 static 0.0 5.0 6.0 -4782.871053113384 -5.233544525848519 14411.4689779756 25.538141347978577 1.307060701942973 1.8731997380823568 0.002537468952847566 0.2911381045328444 14411.4689779756 25.538141347978577
21 static 0.1 8.0 5.0 1629.4524528499896 1.880088900553112 -5347.078589385725 -8.14812684380662 0.3600324838305795 0.5019134064795009 -4.6492644957929485e-05 -0.005316014641356001 -5347.078589385725 -8.14812684380662
22 static 0.25 5.0 6.0 -9938.662276761897 -10.398087633377964 -23616.087243780566 -27.701108621456626 -3.0513860773271233 -4.099238223547561 -0.003519771479853273 -0.40113716461596144 -23616.087243780566 -27.701108621456626
23 static 0.4 3.0 4.0 1850.8400595222774 2.1912497828943436 15058.659457798465 23.67199439061036 3.669612467486587 5.430169778169349 0.006763447803564415 0.7804393835882188 15058.659457798465 23.67199439061036
24 static 0.6 6.0 5.0 1038.893948415236 1.2765037688226162 -6062.864079504681 -9.363144945348399 -1.712609061865976 -2.3996341009364213 -0.0042285583442709385 -0.48362088973179423 -6062.864079504681 -9.363144945348399
25 static 0.8 3.0 7.0 2696.6340631967323 3.6826150812750567 149.22406835677975 0.27280281303997084 0.8491716126507072 1.2427748744725668 0.0032786525965587954 0.3777595573932637 149.22406835677975 0.27280281303997084
26 surge 0.0 6.0 6.0 -606.73760243367 -5.066579306500225 -244.17585425326251 -5.525800641331023 0.014874931199557295 0.09186560988877175 0.0019308940532419272 0.4471794260021321 -244.17585425326251 -5.525800641331023
27 surge 0.1 2.0 5.0 169.78743573408792 1.446343107913299 -1012.7706974660168 -20.02053666691211 -0.14459518037699226 -0.864651254901582 -0.0018650458785858248 -0.4260349899970559 -1012.7706974660168 -20.02053666691211
28 surge 0.25 10.0 7.0 -128.20993816584632 -1.1276930411162496 -81.21373487263281 -1.7081453033360994 0.3008506477195141 1.839047728806548 0.0030750148302954305 0.7102446987902812 -81.21373487263281 -1.7081453033360994
29 surge 0.4 6.0 6.0 -473.03722764431404 -4.297928307550563 28.557452243338048 0.6755106104955642 -0.5027452173053764 -3.072002360121898 -0.005581380442163164 -1.288152985482699 28.557452243338048 0.6755106104955642
30 surge 0.6 2.0 5.0 307.79436325796996 3.0356727142643067 2060.57396030564 63.382050333909866 0.2339650444065704 1.438519400758399 0.001302270025389629 0.30077697380833807 2060.57396030564 63.382050333909866
31 surge 0.8 3.0 3.0 423.15386247993047 4.372210191290083 1117.0942083304312 34.86182570616373 0.8971464536957541 5.327339899805159 0.007068630716831503 1.6094191039618562 1117.0942083304312 34.86182570616373

View File

@@ -1,61 +0,0 @@
tier,alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
dqn,0.0,no_robust,2,358369.40933039243,3531.782519351935,332534.46523867303,114183.5587841961,137.30089123035202,0.8184776440325546,0.9316352418598786,0.0006839003676302996,332534.46523867303,114183.5587841961,,
dqn,0.0,robust,5,327060.42191627494,24311.17412598574,330624.7224979635,62834.39223547943,134.40264757358202,6.160000643680792,0.9296631776227853,0.004262039730140749,330624.7224979635,62834.39223547943,0.17835000000000004,0.08829347371125472
dqn,0.1,no_robust,4,338912.58043645386,19584.736810155388,352449.13650924934,34076.74819101191,135.58860029055563,3.4055508991301524,0.9304589585186211,0.0023438665484978773,352449.13650924934,34076.74819101191,0.0999999999999998,0.0
dqn,0.1,robust,8,331189.03768078494,8060.912085646968,278209.7627908887,57861.69545853692,137.33218367069745,0.43113256118808096,0.931648154732905,0.000296560958972609,278209.7627908887,57861.69545853692,0.2761979166666664,0.09826648189130198
dqn,0.25,no_robust,3,333324.4996115304,6101.717861804452,258281.15112936878,46772.05216097596,137.2201692904545,0.9866477887862672,0.9315871706751672,0.0006356053229300815,258281.15112936878,46772.05216097596,0.25,0.0
dqn,0.25,robust,7,320979.6714216629,7345.8761269427705,351435.18740515393,40320.63699261721,137.25231473995316,0.3527287960309152,0.9316048080097395,0.0002575240668471541,351435.18740515393,40320.63699261721,0.39530952380952394,0.073021206240698
dqn,0.4,no_robust,10,316521.94295076875,3631.1820920182718,315859.66987697606,59129.03566963754,136.50715652926755,0.5085743959240285,0.931261495881483,0.00031280530251053175,315859.66987697606,59129.03566963754,0.3999999999999993,0.0
dqn,0.4,robust,5,308705.6422445519,10654.571556448245,273496.9231922617,68868.59270778317,137.13228376363833,0.9543108715306617,0.9315365574335323,0.0006302636717132419,273496.9231922617,68868.59270778317,0.49856666666666677,0.05745573175159429
dqn,0.6,no_robust,4,302011.2988903938,2354.1141598720183,280836.828756133,58683.00124997926,137.4522093492651,0.4692723362517602,0.9317606434396914,0.0003317518021682495,280836.828756133,58683.00124997926,0.600000000000001,0.0
dqn,0.6,robust,5,285861.2870026513,10386.571631344234,252328.08164747176,59388.56063758225,136.8191461176243,1.0629203361893034,0.9314152691051373,0.0005692783702932289,252328.08164747176,59388.56063758225,0.7361999999999991,0.07108625433623189
dqn,0.8,no_robust,6,282459.51189759385,2625.018247527438,273845.72691287595,66378.16690732416,137.4075681801531,0.29728950101826707,0.9317196295169007,0.00022799290978965786,273845.72691287595,66378.16690732416,0.7999999999999985,0.0
dqn,0.8,robust,7,264267.62923122395,6771.288971321149,218548.7825016336,50043.2009443344,136.61083454541807,1.2319662937254596,0.9310772310393415,0.0010118564779437284,218548.7825016336,50043.2009443344,0.9532857142857143,0.04709817507333055
linear,0.0,no_robust,8,350250.9723061577,3156.286820918861,304636.59490360576,71682.88027353655,134.2397614654424,0.32611787466946035,0.9302824910938235,0.00024020749661685483,304636.59490360576,71682.88027353655,,
linear,0.0,robust,9,335283.29842027643,7707.594869976611,284529.36318678834,55524.58819004573,134.1784835571803,0.4477314164684001,0.9302064136530284,0.00034781034181738526,284529.36318678834,55524.58819004573,,
linear,0.1,no_robust,5,342052.1032713031,2576.546352056584,365492.17954557994,44890.93522299766,134.65068807375954,0.2181027640393531,0.930569018064469,0.00014058935916940913,365492.17954557994,44890.93522299766,,
linear,0.1,robust,3,317520.7033697644,4796.580459456527,268822.39599036984,39256.421140635124,134.28259038297355,0.24570499109363475,0.9303174892809594,0.00018817899183709092,268822.39599036984,39256.421140635124,,
linear,0.25,no_robust,9,328288.0441241802,2178.525494145428,330011.0898339667,38591.36053388808,134.48799697074742,0.2199303973026469,0.9304619997297959,0.00015341642413402035,330011.0898339667,38591.36053388808,,
linear,0.25,robust,6,313447.18464460893,11811.426711620714,303500.9103775427,63358.917144214036,134.3506492062661,0.2947034403278951,0.9303678834855621,0.00021446628431268986,303500.9103775427,63358.917144214036,,
linear,0.4,no_robust,11,313414.0672597746,1982.9537556159262,297576.7714904776,69396.90446617964,134.2708754290745,0.3062093691351849,0.9302780292522507,0.00023067974755288992,297576.7714904776,69396.90446617964,,
linear,0.4,robust,4,296217.3030037579,5109.898340355844,223056.66939230284,38293.73688466607,134.3930461989178,0.12347753686382154,0.9303851681232489,7.324605809708878e-05,223056.66939230284,38293.73688466607,,
linear,0.6,no_robust,3,294227.64307441004,2081.9176570448135,272686.62176604365,66672.50905805513,134.24327165069943,0.30764332256042104,0.9301795837547151,0.00020453921786790446,272686.62176604365,66672.50905805513,,
linear,0.6,robust,5,279943.5769165236,9866.031719660255,311104.3403319788,28363.930707781863,134.48578626304214,0.21280262186464388,0.9304402649517088,0.00020533894868120649,311104.3403319788,28363.930707781863,,
linear,0.8,no_robust,11,275586.89347174135,1618.038877505867,244268.4832547461,56201.44465269986,134.36933631960773,0.2845660213184439,0.9303723007028001,0.00017640716421186918,244268.4832547461,56201.44465269986,,
linear,0.8,robust,4,264746.4048959568,7976.6279174956235,260018.06433340814,57942.49882730146,134.3973875801433,0.31511916357643405,0.9304259195293998,0.00023606570471334208,260018.06433340814,57942.49882730146,,
qtable,0.0,no_robust,8,228675.52179404112,103199.70453252994,159575.94976328663,95848.81008103945,97.07014413321637,33.0637115678536,0.8925069648229078,0.04890522141482132,159575.94976328663,95848.81008103945,0.0,0.0
qtable,0.0,robust,9,210031.0645056426,84361.3834579348,192569.37544387113,116824.7880426837,107.43992336086447,21.41128645838254,0.9110738623425454,0.019188350719133364,192569.37544387113,116824.7880426837,0.11839814814814797,0.061909456985161225
qtable,0.1,no_robust,5,271809.0706466638,14898.209045050968,242616.60384397948,49181.45526408063,114.75666919996793,3.461383158930426,0.9189538140159812,0.002294693249439748,242616.60384397948,49181.45526408063,0.0999999999999998,0.0
qtable,0.1,robust,6,259259.66979111428,102995.29934229614,205408.80683136024,94155.1845420674,114.84507499572386,36.206421837506966,0.9076759158182646,0.048591979839360346,205408.80683136024,94155.1845420674,0.17577777777777767,0.06720562696899951
qtable,0.25,no_robust,5,281190.01916657295,70274.10208723843,252358.2126733039,129868.46825082717,126.29784427276161,15.368804047323954,0.9253103453385114,0.009044883517550522,252358.2126733039,129868.46825082717,0.25,0.0
qtable,0.25,robust,6,279655.6664235949,93056.2549557545,270791.6493078149,116021.46257259768,125.72023167886748,26.760714047253796,0.9219940068478834,0.022785695882060884,270791.6493078149,116021.46257259768,0.3681458333333334,0.08845114686619042
qtable,0.4,no_robust,6,287140.4669895195,32698.16434426399,287292.23388022534,83855.95000252876,127.07104066863859,9.200301166154173,0.9165535777734913,0.01306001923887748,287292.23388022534,83855.95000252876,0.3999999999999993,0.0
qtable,0.4,robust,8,271994.2088134287,79259.3185780895,249928.00800228326,88265.30801790548,127.53218784138639,23.406428094683015,0.9236582230962452,0.020073747007871224,249928.00800228326,88265.30801790548,0.510104166666667,0.09294655989347765
qtable,0.6,no_robust,6,243563.64469828535,67006.60707045678,199430.98211127534,79119.52886604435,121.15594411011905,17.91243944823949,0.9217533740470492,0.011558797825966702,199430.98211127534,79119.52886604435,0.600000000000001,0.0
qtable,0.6,robust,6,233986.0661496293,43155.478617087436,180342.8297722066,48117.79957836251,122.06411912587582,12.160951090203252,0.9233054544895802,0.006840854872863436,180342.8297722066,48117.79957836251,0.7698333333333333,0.09107066853090896
qtable,0.8,no_robust,2,267787.4017455507,1552.038101264713,217510.87340156303,45358.788584678456,133.9448981157492,0.47346860040111405,0.9293224278749692,0.0002998116010539045,217510.87340156303,45358.788584678456,0.7999999999999985,0.0
qtable,0.8,robust,5,215035.72080870424,32869.73253165852,201002.66408757586,63247.67956376057,118.92244403466557,8.586916805142152,0.9215306031138815,0.004644709320891907,201002.66408757586,63247.67956376057,0.9112000000000002,0.07381653307732307
static,0.0,no_robust,6,91388.75248869567,13415.65534300268,56431.15832748852,8525.098185703384,69.77689967440658,3.670744870085874,0.8715688236409825,0.005831496806767582,56431.15832748852,8525.098185703384,,
static,0.0,robust,5,86605.88143558228,7614.909395960895,70842.62730546412,8033.737230392738,71.08396037634955,3.6802889678420283,0.8741062925938301,0.005083911544334936,70842.62730546412,8033.737230392738,,
static,0.1,no_robust,5,86668.90445290186,8037.955688932984,65623.40881389238,19329.448262530004,71.73199185012882,4.199046495412734,0.874577067494122,0.006610505646022198,65623.40881389238,19329.448262530004,,
static,0.1,robust,8,88298.35690575185,9576.838833058617,60276.33022450666,13359.490452744656,72.0920243339594,6.7706096714767865,0.8745305748491641,0.010083585815241344,60276.33022450666,13359.490452744656,,
static,0.25,no_robust,6,95581.63603909909,8345.698435455577,85253.22060752509,13111.526873622026,74.43788116042678,2.1078820386097368,0.8774483618896327,0.0037254791853004897,85253.22060752509,13111.526873622026,,
static,0.25,robust,5,85642.97376233719,9472.880627242153,61637.13336374452,15937.429780623212,71.38649508309966,4.0264905454627264,0.8739285904097794,0.005323853359397925,61637.13336374452,15937.429780623212,,
static,0.4,no_robust,4,84465.04245981346,12101.831388745604,63613.81812329075,7778.361846092061,67.5782271530322,3.9088888968092,0.8666205147756862,0.007149121199217965,63613.81812329075,7778.361846092061,,
static,0.4,robust,3,86315.88251933573,8642.748496122398,78672.47758108922,17823.74997200773,71.24783962051879,2.790416943786253,0.8733839625792507,0.005990544453538607,78672.47758108922,17823.74997200773,,
static,0.6,no_robust,5,81385.88962988024,12343.523894997037,64752.43216774836,23486.779472906223,71.36959177224794,5.100226704959064,0.874353948320141,0.007787250295491337,64752.43216774836,23486.779472906223,,
static,0.6,robust,6,82424.78357829548,9831.886701625144,58689.56808824368,12672.506035553573,69.65698271038197,3.484982360048201,0.8701253899758701,0.005917711231889304,58689.56808824368,12672.506035553573,,
static,0.8,no_robust,7,73226.06364450825,4447.877985963851,54700.340767716196,14406.881298569717,68.32867561883204,3.68262917356943,0.8679204886788817,0.007467501164611224,54700.340767716196,14406.881298569717,,
static,0.8,robust,3,75922.69770770498,5046.089536162847,54849.564836072976,22780.98012221352,69.17784723148274,1.5268167784698885,0.8711991412754405,0.0033278715575433297,54849.564836072976,22780.98012221352,,
surge,0.0,no_robust,6,11975.290738176132,411.4052900076416,4418.832131346071,896.5828048394391,16.192056219479124,0.8040364003224534,0.4317940274006973,0.008271862690929055,4418.832131346071,896.5828048394391,,
surge,0.0,robust,6,11368.553135742462,623.8217438159004,4174.6562770928085,639.9963040241264,16.20693115067868,0.9853827520149101,0.4337249214539392,0.010371668289035135,4174.6562770928085,639.9963040241264,,
surge,0.1,no_robust,5,11739.084232858655,332.778792718381,5058.659087494994,1110.8409258976824,16.722948073839394,0.6578121995950104,0.4377682402562083,0.005683401047550787,5058.659087494994,1110.8409258976824,,
surge,0.1,robust,2,11908.871668592743,81.41250285550258,4045.8883900289775,784.7169500268457,16.5783528934624,0.4088194924856508,0.4359031943776225,0.004531137621699143,4045.8883900289775,784.7169500268457,,
surge,0.25,no_robust,7,11369.223138855004,236.1121240061105,4754.4980344481255,1038.0550037539617,16.359045119223275,0.3945156775653057,0.4329514652531622,0.0038762110261952457,4754.4980344481255,1038.0550037539617,,
surge,0.25,robust,10,11241.013200689158,684.503587066406,4673.284299575493,1187.78635131025,16.65989576694279,1.0515950311117155,0.4360264800834576,0.009701952962125513,4673.284299575493,1187.78635131025,,
surge,0.4,no_robust,6,11006.168409400554,364.6584583108646,4227.535704048808,1414.7964077877168,16.365391636138824,0.9138430058543858,0.4332855262584901,0.008024003783434592,4227.535704048808,1414.7964077877168,,
surge,0.4,robust,6,10533.13118175624,526.0758051960169,4256.093156292146,783.7965507386594,15.862646418833448,0.7732699435426456,0.42770414581632693,0.008967505611725135,4256.093156292146,783.7965507386594,,
surge,0.6,no_robust,5,10139.2472848498,97.448078425168,3251.037082975553,742.2100315641153,16.26429537781848,0.4432465691073604,0.4329686574409998,0.004121820888165019,3251.037082975553,742.2100315641153,,
surge,0.6,robust,2,10447.04164810777,524.0029334247373,5311.611043281193,1808.6200710093085,16.49826042222505,0.6088756908260344,0.43427092746638946,0.007817511630542989,5311.611043281193,1808.6200710093085,,
surge,0.8,no_robust,3,9678.259826640971,272.83530913170915,3204.3479815026553,556.8799617962688,16.840420745981802,0.4589959822922529,0.43920385308157944,0.004953937449529005,3204.3479815026553,556.8799617962688,,
surge,0.8,robust,3,10101.413689120902,526.8318040489241,4321.442189833087,1284.166148011517,17.737567199677557,0.6586775330563983,0.44627248379841095,0.004644261847052545,4321.442189833087,1284.166148011517,,
1 tier alpha mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_margin_mean_mean eval_margin_mean_std objective_score_mean objective_score_std train_alpha_adv_mean train_alpha_adv_std
2 dqn 0.0 no_robust 2 358369.40933039243 3531.782519351935 332534.46523867303 114183.5587841961 137.30089123035202 0.8184776440325546 0.9316352418598786 0.0006839003676302996 332534.46523867303 114183.5587841961
3 dqn 0.0 robust 5 327060.42191627494 24311.17412598574 330624.7224979635 62834.39223547943 134.40264757358202 6.160000643680792 0.9296631776227853 0.004262039730140749 330624.7224979635 62834.39223547943 0.17835000000000004 0.08829347371125472
4 dqn 0.1 no_robust 4 338912.58043645386 19584.736810155388 352449.13650924934 34076.74819101191 135.58860029055563 3.4055508991301524 0.9304589585186211 0.0023438665484978773 352449.13650924934 34076.74819101191 0.0999999999999998 0.0
5 dqn 0.1 robust 8 331189.03768078494 8060.912085646968 278209.7627908887 57861.69545853692 137.33218367069745 0.43113256118808096 0.931648154732905 0.000296560958972609 278209.7627908887 57861.69545853692 0.2761979166666664 0.09826648189130198
6 dqn 0.25 no_robust 3 333324.4996115304 6101.717861804452 258281.15112936878 46772.05216097596 137.2201692904545 0.9866477887862672 0.9315871706751672 0.0006356053229300815 258281.15112936878 46772.05216097596 0.25 0.0
7 dqn 0.25 robust 7 320979.6714216629 7345.8761269427705 351435.18740515393 40320.63699261721 137.25231473995316 0.3527287960309152 0.9316048080097395 0.0002575240668471541 351435.18740515393 40320.63699261721 0.39530952380952394 0.073021206240698
8 dqn 0.4 no_robust 10 316521.94295076875 3631.1820920182718 315859.66987697606 59129.03566963754 136.50715652926755 0.5085743959240285 0.931261495881483 0.00031280530251053175 315859.66987697606 59129.03566963754 0.3999999999999993 0.0
9 dqn 0.4 robust 5 308705.6422445519 10654.571556448245 273496.9231922617 68868.59270778317 137.13228376363833 0.9543108715306617 0.9315365574335323 0.0006302636717132419 273496.9231922617 68868.59270778317 0.49856666666666677 0.05745573175159429
10 dqn 0.6 no_robust 4 302011.2988903938 2354.1141598720183 280836.828756133 58683.00124997926 137.4522093492651 0.4692723362517602 0.9317606434396914 0.0003317518021682495 280836.828756133 58683.00124997926 0.600000000000001 0.0
11 dqn 0.6 robust 5 285861.2870026513 10386.571631344234 252328.08164747176 59388.56063758225 136.8191461176243 1.0629203361893034 0.9314152691051373 0.0005692783702932289 252328.08164747176 59388.56063758225 0.7361999999999991 0.07108625433623189
12 dqn 0.8 no_robust 6 282459.51189759385 2625.018247527438 273845.72691287595 66378.16690732416 137.4075681801531 0.29728950101826707 0.9317196295169007 0.00022799290978965786 273845.72691287595 66378.16690732416 0.7999999999999985 0.0
13 dqn 0.8 robust 7 264267.62923122395 6771.288971321149 218548.7825016336 50043.2009443344 136.61083454541807 1.2319662937254596 0.9310772310393415 0.0010118564779437284 218548.7825016336 50043.2009443344 0.9532857142857143 0.04709817507333055
14 linear 0.0 no_robust 8 350250.9723061577 3156.286820918861 304636.59490360576 71682.88027353655 134.2397614654424 0.32611787466946035 0.9302824910938235 0.00024020749661685483 304636.59490360576 71682.88027353655
15 linear 0.0 robust 9 335283.29842027643 7707.594869976611 284529.36318678834 55524.58819004573 134.1784835571803 0.4477314164684001 0.9302064136530284 0.00034781034181738526 284529.36318678834 55524.58819004573
16 linear 0.1 no_robust 5 342052.1032713031 2576.546352056584 365492.17954557994 44890.93522299766 134.65068807375954 0.2181027640393531 0.930569018064469 0.00014058935916940913 365492.17954557994 44890.93522299766
17 linear 0.1 robust 3 317520.7033697644 4796.580459456527 268822.39599036984 39256.421140635124 134.28259038297355 0.24570499109363475 0.9303174892809594 0.00018817899183709092 268822.39599036984 39256.421140635124
18 linear 0.25 no_robust 9 328288.0441241802 2178.525494145428 330011.0898339667 38591.36053388808 134.48799697074742 0.2199303973026469 0.9304619997297959 0.00015341642413402035 330011.0898339667 38591.36053388808
19 linear 0.25 robust 6 313447.18464460893 11811.426711620714 303500.9103775427 63358.917144214036 134.3506492062661 0.2947034403278951 0.9303678834855621 0.00021446628431268986 303500.9103775427 63358.917144214036
20 linear 0.4 no_robust 11 313414.0672597746 1982.9537556159262 297576.7714904776 69396.90446617964 134.2708754290745 0.3062093691351849 0.9302780292522507 0.00023067974755288992 297576.7714904776 69396.90446617964
21 linear 0.4 robust 4 296217.3030037579 5109.898340355844 223056.66939230284 38293.73688466607 134.3930461989178 0.12347753686382154 0.9303851681232489 7.324605809708878e-05 223056.66939230284 38293.73688466607
22 linear 0.6 no_robust 3 294227.64307441004 2081.9176570448135 272686.62176604365 66672.50905805513 134.24327165069943 0.30764332256042104 0.9301795837547151 0.00020453921786790446 272686.62176604365 66672.50905805513
23 linear 0.6 robust 5 279943.5769165236 9866.031719660255 311104.3403319788 28363.930707781863 134.48578626304214 0.21280262186464388 0.9304402649517088 0.00020533894868120649 311104.3403319788 28363.930707781863
24 linear 0.8 no_robust 11 275586.89347174135 1618.038877505867 244268.4832547461 56201.44465269986 134.36933631960773 0.2845660213184439 0.9303723007028001 0.00017640716421186918 244268.4832547461 56201.44465269986
25 linear 0.8 robust 4 264746.4048959568 7976.6279174956235 260018.06433340814 57942.49882730146 134.3973875801433 0.31511916357643405 0.9304259195293998 0.00023606570471334208 260018.06433340814 57942.49882730146
26 qtable 0.0 no_robust 8 228675.52179404112 103199.70453252994 159575.94976328663 95848.81008103945 97.07014413321637 33.0637115678536 0.8925069648229078 0.04890522141482132 159575.94976328663 95848.81008103945 0.0 0.0
27 qtable 0.0 robust 9 210031.0645056426 84361.3834579348 192569.37544387113 116824.7880426837 107.43992336086447 21.41128645838254 0.9110738623425454 0.019188350719133364 192569.37544387113 116824.7880426837 0.11839814814814797 0.061909456985161225
28 qtable 0.1 no_robust 5 271809.0706466638 14898.209045050968 242616.60384397948 49181.45526408063 114.75666919996793 3.461383158930426 0.9189538140159812 0.002294693249439748 242616.60384397948 49181.45526408063 0.0999999999999998 0.0
29 qtable 0.1 robust 6 259259.66979111428 102995.29934229614 205408.80683136024 94155.1845420674 114.84507499572386 36.206421837506966 0.9076759158182646 0.048591979839360346 205408.80683136024 94155.1845420674 0.17577777777777767 0.06720562696899951
30 qtable 0.25 no_robust 5 281190.01916657295 70274.10208723843 252358.2126733039 129868.46825082717 126.29784427276161 15.368804047323954 0.9253103453385114 0.009044883517550522 252358.2126733039 129868.46825082717 0.25 0.0
31 qtable 0.25 robust 6 279655.6664235949 93056.2549557545 270791.6493078149 116021.46257259768 125.72023167886748 26.760714047253796 0.9219940068478834 0.022785695882060884 270791.6493078149 116021.46257259768 0.3681458333333334 0.08845114686619042
32 qtable 0.4 no_robust 6 287140.4669895195 32698.16434426399 287292.23388022534 83855.95000252876 127.07104066863859 9.200301166154173 0.9165535777734913 0.01306001923887748 287292.23388022534 83855.95000252876 0.3999999999999993 0.0
33 qtable 0.4 robust 8 271994.2088134287 79259.3185780895 249928.00800228326 88265.30801790548 127.53218784138639 23.406428094683015 0.9236582230962452 0.020073747007871224 249928.00800228326 88265.30801790548 0.510104166666667 0.09294655989347765
34 qtable 0.6 no_robust 6 243563.64469828535 67006.60707045678 199430.98211127534 79119.52886604435 121.15594411011905 17.91243944823949 0.9217533740470492 0.011558797825966702 199430.98211127534 79119.52886604435 0.600000000000001 0.0
35 qtable 0.6 robust 6 233986.0661496293 43155.478617087436 180342.8297722066 48117.79957836251 122.06411912587582 12.160951090203252 0.9233054544895802 0.006840854872863436 180342.8297722066 48117.79957836251 0.7698333333333333 0.09107066853090896
36 qtable 0.8 no_robust 2 267787.4017455507 1552.038101264713 217510.87340156303 45358.788584678456 133.9448981157492 0.47346860040111405 0.9293224278749692 0.0002998116010539045 217510.87340156303 45358.788584678456 0.7999999999999985 0.0
37 qtable 0.8 robust 5 215035.72080870424 32869.73253165852 201002.66408757586 63247.67956376057 118.92244403466557 8.586916805142152 0.9215306031138815 0.004644709320891907 201002.66408757586 63247.67956376057 0.9112000000000002 0.07381653307732307
38 static 0.0 no_robust 6 91388.75248869567 13415.65534300268 56431.15832748852 8525.098185703384 69.77689967440658 3.670744870085874 0.8715688236409825 0.005831496806767582 56431.15832748852 8525.098185703384
39 static 0.0 robust 5 86605.88143558228 7614.909395960895 70842.62730546412 8033.737230392738 71.08396037634955 3.6802889678420283 0.8741062925938301 0.005083911544334936 70842.62730546412 8033.737230392738
40 static 0.1 no_robust 5 86668.90445290186 8037.955688932984 65623.40881389238 19329.448262530004 71.73199185012882 4.199046495412734 0.874577067494122 0.006610505646022198 65623.40881389238 19329.448262530004
41 static 0.1 robust 8 88298.35690575185 9576.838833058617 60276.33022450666 13359.490452744656 72.0920243339594 6.7706096714767865 0.8745305748491641 0.010083585815241344 60276.33022450666 13359.490452744656
42 static 0.25 no_robust 6 95581.63603909909 8345.698435455577 85253.22060752509 13111.526873622026 74.43788116042678 2.1078820386097368 0.8774483618896327 0.0037254791853004897 85253.22060752509 13111.526873622026
43 static 0.25 robust 5 85642.97376233719 9472.880627242153 61637.13336374452 15937.429780623212 71.38649508309966 4.0264905454627264 0.8739285904097794 0.005323853359397925 61637.13336374452 15937.429780623212
44 static 0.4 no_robust 4 84465.04245981346 12101.831388745604 63613.81812329075 7778.361846092061 67.5782271530322 3.9088888968092 0.8666205147756862 0.007149121199217965 63613.81812329075 7778.361846092061
45 static 0.4 robust 3 86315.88251933573 8642.748496122398 78672.47758108922 17823.74997200773 71.24783962051879 2.790416943786253 0.8733839625792507 0.005990544453538607 78672.47758108922 17823.74997200773
46 static 0.6 no_robust 5 81385.88962988024 12343.523894997037 64752.43216774836 23486.779472906223 71.36959177224794 5.100226704959064 0.874353948320141 0.007787250295491337 64752.43216774836 23486.779472906223
47 static 0.6 robust 6 82424.78357829548 9831.886701625144 58689.56808824368 12672.506035553573 69.65698271038197 3.484982360048201 0.8701253899758701 0.005917711231889304 58689.56808824368 12672.506035553573
48 static 0.8 no_robust 7 73226.06364450825 4447.877985963851 54700.340767716196 14406.881298569717 68.32867561883204 3.68262917356943 0.8679204886788817 0.007467501164611224 54700.340767716196 14406.881298569717
49 static 0.8 robust 3 75922.69770770498 5046.089536162847 54849.564836072976 22780.98012221352 69.17784723148274 1.5268167784698885 0.8711991412754405 0.0033278715575433297 54849.564836072976 22780.98012221352
50 surge 0.0 no_robust 6 11975.290738176132 411.4052900076416 4418.832131346071 896.5828048394391 16.192056219479124 0.8040364003224534 0.4317940274006973 0.008271862690929055 4418.832131346071 896.5828048394391
51 surge 0.0 robust 6 11368.553135742462 623.8217438159004 4174.6562770928085 639.9963040241264 16.20693115067868 0.9853827520149101 0.4337249214539392 0.010371668289035135 4174.6562770928085 639.9963040241264
52 surge 0.1 no_robust 5 11739.084232858655 332.778792718381 5058.659087494994 1110.8409258976824 16.722948073839394 0.6578121995950104 0.4377682402562083 0.005683401047550787 5058.659087494994 1110.8409258976824
53 surge 0.1 robust 2 11908.871668592743 81.41250285550258 4045.8883900289775 784.7169500268457 16.5783528934624 0.4088194924856508 0.4359031943776225 0.004531137621699143 4045.8883900289775 784.7169500268457
54 surge 0.25 no_robust 7 11369.223138855004 236.1121240061105 4754.4980344481255 1038.0550037539617 16.359045119223275 0.3945156775653057 0.4329514652531622 0.0038762110261952457 4754.4980344481255 1038.0550037539617
55 surge 0.25 robust 10 11241.013200689158 684.503587066406 4673.284299575493 1187.78635131025 16.65989576694279 1.0515950311117155 0.4360264800834576 0.009701952962125513 4673.284299575493 1187.78635131025
56 surge 0.4 no_robust 6 11006.168409400554 364.6584583108646 4227.535704048808 1414.7964077877168 16.365391636138824 0.9138430058543858 0.4332855262584901 0.008024003783434592 4227.535704048808 1414.7964077877168
57 surge 0.4 robust 6 10533.13118175624 526.0758051960169 4256.093156292146 783.7965507386594 15.862646418833448 0.7732699435426456 0.42770414581632693 0.008967505611725135 4256.093156292146 783.7965507386594
58 surge 0.6 no_robust 5 10139.2472848498 97.448078425168 3251.037082975553 742.2100315641153 16.26429537781848 0.4432465691073604 0.4329686574409998 0.004121820888165019 3251.037082975553 742.2100315641153
59 surge 0.6 robust 2 10447.04164810777 524.0029334247373 5311.611043281193 1808.6200710093085 16.49826042222505 0.6088756908260344 0.43427092746638946 0.007817511630542989 5311.611043281193 1808.6200710093085
60 surge 0.8 no_robust 3 9678.259826640971 272.83530913170915 3204.3479815026553 556.8799617962688 16.840420745981802 0.4589959822922529 0.43920385308157944 0.004953937449529005 3204.3479815026553 556.8799617962688
61 surge 0.8 robust 3 10101.413689120902 526.8318040489241 4321.442189833087 1284.166148011517 17.737567199677557 0.6586775330563983 0.44627248379841095 0.004644261847052545 4321.442189833087 1284.166148011517

View File

@@ -1,11 +0,0 @@
tier,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_margin_mean_mean,eval_margin_mean_std,objective_score_mean,objective_score_std,train_alpha_adv_mean,train_alpha_adv_std
dqn,no_robust,29,315185.66674813855,23538.781000060844,302576.8036266896,62951.88633145167,136.82560356086017,1.3692652218935986,0.9313739013618878,0.0009314135057224836,302576.8036266896,62951.88633145167,0.45740740740740693,0.2368477698794438
dqn,robust,37,306875.13950902375,27585.74444520695,283724.7169827867,69843.05611741856,136.68837571992978,2.3797541654948753,0.9312171495138941,0.0016512408492580111,283724.7169827867,69843.05611741856,0.5058198198198196,0.28324483129860284
linear,no_robust,47,315501.15296155965,27105.014861872147,298149.1730416604,67664.7308344108,134.36884359609928,0.29743647613433244,0.9303607531364,0.0002152647006739543,298149.1730416604,67664.7308344108,,
linear,robust,31,306269.9232239004,26399.875293394463,279872.824370329,54401.104602086416,134.32737693008372,0.31909212993628877,0.9303375215162144,0.00025000448833182963,279872.824370329,54401.104602086416,,
qtable,no_robust,32,259818.72178238883,67188.58622318009,222088.83510765125,94450.12569617687,116.84641954166946,22.42810298937963,0.9140582213134033,0.02778864370791322,222088.83510765125,94450.12569617687,0.29218749999999993,0.2559326319498438
qtable,robust,40,244470.50673219413,78666.30912808319,216920.53697298188,93983.50987622296,118.94013969887506,23.1428303249914,0.9178608956089163,0.023827311253270544,216920.53697298188,93983.50987622296,0.4396239583333334,0.29521865862482416
static,no_robust,33,85228.452028227,12041.415672002751,64828.579890468536,17681.280330831738,70.58818912317687,4.204964531595236,0.8721419294578765,0.007107262779462876,64828.579890468536,17681.280330831738,,
static,robust,30,84963.18577955024,8926.291379160475,63243.76603076817,14880.924342692271,70.94358095957392,4.363134562111469,0.8730306888410219,0.006660289247744752,63243.76603076817,14880.924342692271,,
surge,no_robust,32,11121.867310184698,809.9895800277001,4260.038064073964,1160.4282377968032,16.416108827015794,0.641203520341943,0.43413855082681374,0.006214799767130059,4260.038064073964,1160.4282377968032,,
surge,robust,29,10994.355365953365,750.5115890942825,4448.160863178768,1000.7519971246122,16.495943148858906,0.9823026347466668,0.4347587896392907,0.009698591291108968,4448.160863178768,1000.7519971246122,,
1 tier mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_margin_mean_mean eval_margin_mean_std objective_score_mean objective_score_std train_alpha_adv_mean train_alpha_adv_std
2 dqn no_robust 29 315185.66674813855 23538.781000060844 302576.8036266896 62951.88633145167 136.82560356086017 1.3692652218935986 0.9313739013618878 0.0009314135057224836 302576.8036266896 62951.88633145167 0.45740740740740693 0.2368477698794438
3 dqn robust 37 306875.13950902375 27585.74444520695 283724.7169827867 69843.05611741856 136.68837571992978 2.3797541654948753 0.9312171495138941 0.0016512408492580111 283724.7169827867 69843.05611741856 0.5058198198198196 0.28324483129860284
4 linear no_robust 47 315501.15296155965 27105.014861872147 298149.1730416604 67664.7308344108 134.36884359609928 0.29743647613433244 0.9303607531364 0.0002152647006739543 298149.1730416604 67664.7308344108
5 linear robust 31 306269.9232239004 26399.875293394463 279872.824370329 54401.104602086416 134.32737693008372 0.31909212993628877 0.9303375215162144 0.00025000448833182963 279872.824370329 54401.104602086416
6 qtable no_robust 32 259818.72178238883 67188.58622318009 222088.83510765125 94450.12569617687 116.84641954166946 22.42810298937963 0.9140582213134033 0.02778864370791322 222088.83510765125 94450.12569617687 0.29218749999999993 0.2559326319498438
7 qtable robust 40 244470.50673219413 78666.30912808319 216920.53697298188 93983.50987622296 118.94013969887506 23.1428303249914 0.9178608956089163 0.023827311253270544 216920.53697298188 93983.50987622296 0.4396239583333334 0.29521865862482416
8 static no_robust 33 85228.452028227 12041.415672002751 64828.579890468536 17681.280330831738 70.58818912317687 4.204964531595236 0.8721419294578765 0.007107262779462876 64828.579890468536 17681.280330831738
9 static robust 30 84963.18577955024 8926.291379160475 63243.76603076817 14880.924342692271 70.94358095957392 4.363134562111469 0.8730306888410219 0.006660289247744752 63243.76603076817 14880.924342692271
10 surge no_robust 32 11121.867310184698 809.9895800277001 4260.038064073964 1160.4282377968032 16.416108827015794 0.641203520341943 0.43413855082681374 0.006214799767130059 4260.038064073964 1160.4282377968032
11 surge robust 29 10994.355365953365 750.5115890942825 4448.160863178768 1000.7519971246122 16.495943148858906 0.9823026347466668 0.4347587896392907 0.009698591291108968 4448.160863178768 1000.7519971246122

View File

@@ -1,26 +0,0 @@
Name,tier,alpha,mode,objective/score,eval/revenue_mean,eval/reward_mean,eval/coi_level_mean,lambda_coi,robust_radius,learning_rate,batch_size,n_steps,total_timesteps
eager-sweep-244,dqn,0.0,no_robust,413274.4339549909,355872.06196128257,413274.4339549909,136.722140138007,0.2,0.1,0.0003,256,4096,15000
efficient-sweep-319,linear,0.0,no_robust,410094.0151741567,353309.5198146561,410094.0151741567,134.55152038805429,0.4,0.1,0.001,128,4096,15000
swept-sweep-422,linear,0.0,no_robust,403130.32747386186,347611.2815474988,403130.32747386186,133.8559785775022,0.4,0.3,0.0001,512,1024,15000
decent-sweep-478,linear,0.1,no_robust,400452.36418713134,345284.5750647792,400452.36418713134,134.73082941975588,0.1,0.2,0.001,128,1024,50000
eternal-sweep-339,linear,0.1,no_robust,399628.4231731644,344154.38525771734,399628.4231731644,134.89479277649667,0.4,0.1,0.0001,256,1024,50000
ethereal-sweep-21,dqn,0.1,no_robust,398492.807245857,343580.6802427996,398492.807245857,136.67160732585188,0.1,0.2,0.001,512,2048,50000
dark-sweep-418,linear,0.1,no_robust,394615.3720658343,339749.76272695075,394615.3720658343,134.39233246711,0.2,0.1,0.0003,256,1024,50000
wandering-sweep-122,dqn,0.0,robust,394061.3617726404,339512.43434806296,394061.3617726404,137.6864755964331,0.1,0.3,0.0001,256,2048,30000
laced-sweep-132,dqn,0.1,robust,389274.54998495104,335600.5979215904,389274.54998495104,137.36888574027677,0.4,0.2,0.001,256,2048,30000
rich-sweep-53,qtable,0.0,robust,388601.2626147048,335630.6853337664,388601.2626147048,133.4414069888203,0.2,0.1,0.0001,512,1024,50000
faithful-sweep-430,qtable,0.25,no_robust,387035.6970938766,333255.5771210341,387035.6970938766,137.4906091183188,0.1,0.2,0.0003,128,1024,15000
dark-sweep-280,qtable,0.25,no_robust,386318.8845004527,332220.0316564078,386318.8845004527,137.26992450099925,0.4,0.1,0.0001,256,1024,50000
chocolate-sweep-383,linear,0.25,no_robust,383989.49015403807,331071.7003244704,383989.49015403807,134.60590742050857,0.1,0.2,0.001,512,1024,30000
dry-sweep-263,dqn,0.0,robust,383372.6880637367,330436.0312615148,383372.6880637367,137.40558130223476,0.1,0.3,0.001,128,1024,50000
different-sweep-143,qtable,0.0,robust,383278.4198015018,330546.16800945485,383278.4198015018,135.9021538079678,0.1,0.3,0.001,256,2048,30000
woven-sweep-139,dqn,0.25,robust,382788.1296637251,329427.735752473,382788.1296637251,136.8968339394894,0.1,0.1,0.001,512,1024,15000
dark-sweep-215,dqn,0.25,robust,382358.2401374872,329330.0097603144,382358.2401374872,137.64528612332785,0.2,0.1,0.0001,512,4096,30000
charmed-sweep-136,linear,0.25,no_robust,382249.5728044314,329646.2053260979,382249.5728044314,134.46825608007862,0.4,0.1,0.0001,256,2048,15000
light-sweep-308,linear,0.0,robust,381939.1275250679,329628.9436641051,381939.1275250679,133.6209821974879,0.2,0.2,0.001,128,4096,30000
treasured-sweep-325,linear,0.25,robust,381322.0104772589,328353.58675398555,381322.0104772589,134.8950293943581,0.1,0.1,0.0001,512,2048,15000
fine-sweep-202,dqn,0.25,robust,378751.33572275366,326518.9068184018,378751.33572275366,137.2900973301052,0.1,0.2,0.0001,512,2048,30000
treasured-sweep-380,linear,0.25,no_robust,377898.0979419424,325869.1953595453,377898.0979419424,134.54118723889738,0.4,0.3,0.001,128,1024,50000
pretty-sweep-49,qtable,0.25,robust,377318.4766808995,325282.0152823859,377318.4766808995,137.19609012644068,0.4,0.1,0.0001,128,4096,50000
desert-sweep-253,linear,0.25,robust,376808.6335063269,325146.3478714648,376808.6335063269,134.48396340732663,0.2,0.1,0.0003,256,1024,30000
jolly-sweep-133,qtable,0.4,no_robust,376419.57394710975,323709.24588324485,376419.57394710975,137.8349363778071,0.1,0.3,0.0001,128,2048,50000
1 Name tier alpha mode objective/score eval/revenue_mean eval/reward_mean eval/coi_level_mean lambda_coi robust_radius learning_rate batch_size n_steps total_timesteps
2 eager-sweep-244 dqn 0.0 no_robust 413274.4339549909 355872.06196128257 413274.4339549909 136.722140138007 0.2 0.1 0.0003 256 4096 15000
3 efficient-sweep-319 linear 0.0 no_robust 410094.0151741567 353309.5198146561 410094.0151741567 134.55152038805429 0.4 0.1 0.001 128 4096 15000
4 swept-sweep-422 linear 0.0 no_robust 403130.32747386186 347611.2815474988 403130.32747386186 133.8559785775022 0.4 0.3 0.0001 512 1024 15000
5 decent-sweep-478 linear 0.1 no_robust 400452.36418713134 345284.5750647792 400452.36418713134 134.73082941975588 0.1 0.2 0.001 128 1024 50000
6 eternal-sweep-339 linear 0.1 no_robust 399628.4231731644 344154.38525771734 399628.4231731644 134.89479277649667 0.4 0.1 0.0001 256 1024 50000
7 ethereal-sweep-21 dqn 0.1 no_robust 398492.807245857 343580.6802427996 398492.807245857 136.67160732585188 0.1 0.2 0.001 512 2048 50000
8 dark-sweep-418 linear 0.1 no_robust 394615.3720658343 339749.76272695075 394615.3720658343 134.39233246711 0.2 0.1 0.0003 256 1024 50000
9 wandering-sweep-122 dqn 0.0 robust 394061.3617726404 339512.43434806296 394061.3617726404 137.6864755964331 0.1 0.3 0.0001 256 2048 30000
10 laced-sweep-132 dqn 0.1 robust 389274.54998495104 335600.5979215904 389274.54998495104 137.36888574027677 0.4 0.2 0.001 256 2048 30000
11 rich-sweep-53 qtable 0.0 robust 388601.2626147048 335630.6853337664 388601.2626147048 133.4414069888203 0.2 0.1 0.0001 512 1024 50000
12 faithful-sweep-430 qtable 0.25 no_robust 387035.6970938766 333255.5771210341 387035.6970938766 137.4906091183188 0.1 0.2 0.0003 128 1024 15000
13 dark-sweep-280 qtable 0.25 no_robust 386318.8845004527 332220.0316564078 386318.8845004527 137.26992450099925 0.4 0.1 0.0001 256 1024 50000
14 chocolate-sweep-383 linear 0.25 no_robust 383989.49015403807 331071.7003244704 383989.49015403807 134.60590742050857 0.1 0.2 0.001 512 1024 30000
15 dry-sweep-263 dqn 0.0 robust 383372.6880637367 330436.0312615148 383372.6880637367 137.40558130223476 0.1 0.3 0.001 128 1024 50000
16 different-sweep-143 qtable 0.0 robust 383278.4198015018 330546.16800945485 383278.4198015018 135.9021538079678 0.1 0.3 0.001 256 2048 30000
17 woven-sweep-139 dqn 0.25 robust 382788.1296637251 329427.735752473 382788.1296637251 136.8968339394894 0.1 0.1 0.001 512 1024 15000
18 dark-sweep-215 dqn 0.25 robust 382358.2401374872 329330.0097603144 382358.2401374872 137.64528612332785 0.2 0.1 0.0001 512 4096 30000
19 charmed-sweep-136 linear 0.25 no_robust 382249.5728044314 329646.2053260979 382249.5728044314 134.46825608007862 0.4 0.1 0.0001 256 2048 15000
20 light-sweep-308 linear 0.0 robust 381939.1275250679 329628.9436641051 381939.1275250679 133.6209821974879 0.2 0.2 0.001 128 4096 30000
21 treasured-sweep-325 linear 0.25 robust 381322.0104772589 328353.58675398555 381322.0104772589 134.8950293943581 0.1 0.1 0.0001 512 2048 15000
22 fine-sweep-202 dqn 0.25 robust 378751.33572275366 326518.9068184018 378751.33572275366 137.2900973301052 0.1 0.2 0.0001 512 2048 30000
23 treasured-sweep-380 linear 0.25 no_robust 377898.0979419424 325869.1953595453 377898.0979419424 134.54118723889738 0.4 0.3 0.001 128 1024 50000
24 pretty-sweep-49 qtable 0.25 robust 377318.4766808995 325282.0152823859 377318.4766808995 137.19609012644068 0.4 0.1 0.0001 128 4096 50000
25 desert-sweep-253 linear 0.25 robust 376808.6335063269 325146.3478714648 376808.6335063269 134.48396340732663 0.2 0.1 0.0003 256 1024 30000
26 jolly-sweep-133 qtable 0.4 no_robust 376419.57394710975 323709.24588324485 376419.57394710975 137.8349363778071 0.1 0.3 0.0001 128 2048 50000

View File

@@ -1,7 +0,0 @@
alpha,runs_robust,runs_no_robust,eval_revenue_mean_robust,eval_revenue_mean_no_robust,eval_revenue_mean_delta,eval_revenue_mean_delta_pct,eval_reward_mean_robust,eval_reward_mean_no_robust,eval_reward_mean_delta,eval_reward_mean_delta_pct,eval_coi_level_mean_robust,eval_coi_level_mean_no_robust,eval_coi_level_mean_delta,eval_coi_level_mean_delta_pct,eval_coi_leakage_mean_robust,eval_coi_leakage_mean_no_robust,eval_coi_leakage_mean_delta,eval_coi_leakage_mean_delta_pct,eval_volatility_mean_robust,eval_volatility_mean_no_robust,eval_volatility_mean_delta,eval_volatility_mean_delta_pct,eval_margin_mean_robust,eval_margin_mean_no_robust,eval_margin_mean_delta,eval_margin_mean_delta_pct,train_alpha_adv_robust,train_alpha_adv_no_robust,train_alpha_adv_delta,train_alpha_adv_delta_pct,train_coi_penalty_robust,train_coi_penalty_no_robust,train_coi_penalty_delta,train_coi_penalty_delta_pct,train_ux_penalty_robust,train_ux_penalty_no_robust,train_ux_penalty_delta,train_ux_penalty_delta_pct,train_agent_prob_robust,train_agent_prob_no_robust,train_agent_prob_delta,train_agent_prob_delta_pct
0.0,4.0,4.0,3379.9042994670963,3565.2912010160844,-185.38690154898813,-5.199768857482219,313527.4707462,331300.229069,-17772.758322799986,-5.364547550342456,137.08358925982625,137.28764358955686,-0.2040543297306101,-0.14863269875959326,0.1146626165658294,0.11861133504329742,-0.003948718477468013,-3.3291240470622716,0.06687153537785637,0.06445662162531288,0.0024149137525434905,3.746572022625408,0.9315273502623671,0.9317078361627993,-0.00018048590043218127,-0.019371512552207898,0.18958333333333333,,,,5.553200113221484,,,,61.35134238638615,66.58479574844135,-5.233453362055201,-7.859832418540847,0.12778212146468534,0.11615891320235115,0.011623208262334192,10.00629907933654
0.1,4.0,4.0,3307.028238366196,3458.002436284769,-150.97419791857283,-4.365936713473732,306772.49146475,321215.477968,-14442.986503249966,-4.4963544704059375,137.1182041122497,136.82757579763506,0.29062831461465066,0.21240478238427865,0.1128546052304944,0.11704917861668755,-0.004194573386193154,-3.5835991638433753,0.0685405649303561,0.06737596899527175,0.0011645959350843477,1.728503430007924,0.9315331673960889,0.9313276818191593,0.00020548557692967595,0.0220637248243606,0.2818749999999999,0.1,0.18187499999999987,181.87499999999986,5.079528726095333,,,,52.44772950699336,53.288869747139515,-0.841140240146153,-1.578453895039319,0.11644381911386253,0.11765277436070229,-0.0012089552468397546,-1.0275620387270383
0.25,4.0,4.0,3134.3438215278165,3300.5539051855053,-166.21008365768876,-5.035823938416998,290691.4771835,306522.90003785,-15831.422854350007,-5.16484179563586,136.89990884669214,136.71752459667877,0.18238425001337077,0.1334022471160229,0.11113957413522965,0.1139905600539111,-0.0028509859186814507,-2.50107194607439,0.06427159998376095,0.06846858821082077,-0.004196988227059828,-6.12980103246314,0.9314501501825461,0.9313053225630614,0.0001448276194846443,0.015551035302371268,0.44833333333333336,0.25,0.19833333333333336,79.33333333333334,4.7183804755060255,,,,49.04307009982127,55.2030005738411,-6.159930474019831,-11.158687770568074,0.10998505830218755,0.11684259343269415,-0.0068575351305066035,-5.869037077182653
0.4,4.0,4.0,2983.852437569374,3180.7872854626567,-196.9348478932825,-6.191386918369099,276545.26309355,295433.5405797,-18888.277486150037,-6.393409986248494,136.19210761854086,136.5783021470118,-0.38619452847095204,-0.2827641890402586,0.10875560547061063,0.11189234314151972,-0.0031367376709090927,-2.8033532794480807,0.07452230347799255,0.07104688223410768,0.003475421243884863,4.891729425132195,0.9307282962514367,0.9310542820602117,-0.0003259858087749645,-0.03501254599824534,0.5999999999999999,0.4000000000000001,0.1999999999999998,49.999999999999936,4.174996403604185,,,,47.99794119802058,50.794260008988424,-2.796318810967847,-5.505186630286606,0.10222958892923095,0.11161526349272373,-0.009385674563492777,-8.408952565976458
0.6,4.0,4.0,2789.0434220430398,2982.2460998252786,-193.20267778223888,-6.4784283830083,258688.11700405,277051.95613675,-18363.8391327,-6.628301560749781,136.86774320500828,136.81931587629953,0.04842732870875466,0.035395096371142916,0.10501047827147733,0.10802266412956946,-0.0030121858580921257,-2.788475809557069,0.06914180963767007,0.06698591531512615,0.0021558943225439137,3.2184292957732996,0.9314130089130337,0.9313849217310588,2.8087181974889575e-05,0.003015636319588161,0.7733333333333334,0.5999999999999999,0.17333333333333356,28.888888888888935,4.178300996512875,,,,39.928062615509425,47.86860429278531,-7.940541677275881,-16.588203885594947,0.11297979438696983,0.1162670925925253,-0.0032872982055554695,-2.827367686122743
0.8,4.0,4.0,2586.098242115281,2841.1305915063504,-255.03234939106915,-8.97643882169642,239765.24959855,264140.55002745,-24375.300428900024,-9.228155399224729,136.5038826686135,137.28163778418497,-0.7777551155714661,-0.5665397995864124,0.10253056902792507,0.1031498585902154,-0.0006192895622903344,-0.6003784888844036,0.07325665736408164,0.06592454978099352,0.007332107583088124,11.1219683827132,0.9311235469993302,0.9316596013994161,-0.0005360544000858614,-0.05753758124541101,1.0,0.8000000000000002,0.19999999999999984,24.99999999999998,3.5384100686094007,,,,37.14414699970415,37.43809775029793,-0.29395075059377973,-0.7851647606519765,0.09990322635678014,0.10432800196112454,-0.0044247756043444,-4.241215705437541
1 alpha runs_robust runs_no_robust eval_revenue_mean_robust eval_revenue_mean_no_robust eval_revenue_mean_delta eval_revenue_mean_delta_pct eval_reward_mean_robust eval_reward_mean_no_robust eval_reward_mean_delta eval_reward_mean_delta_pct eval_coi_level_mean_robust eval_coi_level_mean_no_robust eval_coi_level_mean_delta eval_coi_level_mean_delta_pct eval_coi_leakage_mean_robust eval_coi_leakage_mean_no_robust eval_coi_leakage_mean_delta eval_coi_leakage_mean_delta_pct eval_volatility_mean_robust eval_volatility_mean_no_robust eval_volatility_mean_delta eval_volatility_mean_delta_pct eval_margin_mean_robust eval_margin_mean_no_robust eval_margin_mean_delta eval_margin_mean_delta_pct train_alpha_adv_robust train_alpha_adv_no_robust train_alpha_adv_delta train_alpha_adv_delta_pct train_coi_penalty_robust train_coi_penalty_no_robust train_coi_penalty_delta train_coi_penalty_delta_pct train_ux_penalty_robust train_ux_penalty_no_robust train_ux_penalty_delta train_ux_penalty_delta_pct train_agent_prob_robust train_agent_prob_no_robust train_agent_prob_delta train_agent_prob_delta_pct
2 0.0 4.0 4.0 3379.9042994670963 3565.2912010160844 -185.38690154898813 -5.199768857482219 313527.4707462 331300.229069 -17772.758322799986 -5.364547550342456 137.08358925982625 137.28764358955686 -0.2040543297306101 -0.14863269875959326 0.1146626165658294 0.11861133504329742 -0.003948718477468013 -3.3291240470622716 0.06687153537785637 0.06445662162531288 0.0024149137525434905 3.746572022625408 0.9315273502623671 0.9317078361627993 -0.00018048590043218127 -0.019371512552207898 0.18958333333333333 5.553200113221484 61.35134238638615 66.58479574844135 -5.233453362055201 -7.859832418540847 0.12778212146468534 0.11615891320235115 0.011623208262334192 10.00629907933654
3 0.1 4.0 4.0 3307.028238366196 3458.002436284769 -150.97419791857283 -4.365936713473732 306772.49146475 321215.477968 -14442.986503249966 -4.4963544704059375 137.1182041122497 136.82757579763506 0.29062831461465066 0.21240478238427865 0.1128546052304944 0.11704917861668755 -0.004194573386193154 -3.5835991638433753 0.0685405649303561 0.06737596899527175 0.0011645959350843477 1.728503430007924 0.9315331673960889 0.9313276818191593 0.00020548557692967595 0.0220637248243606 0.2818749999999999 0.1 0.18187499999999987 181.87499999999986 5.079528726095333 52.44772950699336 53.288869747139515 -0.841140240146153 -1.578453895039319 0.11644381911386253 0.11765277436070229 -0.0012089552468397546 -1.0275620387270383
4 0.25 4.0 4.0 3134.3438215278165 3300.5539051855053 -166.21008365768876 -5.035823938416998 290691.4771835 306522.90003785 -15831.422854350007 -5.16484179563586 136.89990884669214 136.71752459667877 0.18238425001337077 0.1334022471160229 0.11113957413522965 0.1139905600539111 -0.0028509859186814507 -2.50107194607439 0.06427159998376095 0.06846858821082077 -0.004196988227059828 -6.12980103246314 0.9314501501825461 0.9313053225630614 0.0001448276194846443 0.015551035302371268 0.44833333333333336 0.25 0.19833333333333336 79.33333333333334 4.7183804755060255 49.04307009982127 55.2030005738411 -6.159930474019831 -11.158687770568074 0.10998505830218755 0.11684259343269415 -0.0068575351305066035 -5.869037077182653
5 0.4 4.0 4.0 2983.852437569374 3180.7872854626567 -196.9348478932825 -6.191386918369099 276545.26309355 295433.5405797 -18888.277486150037 -6.393409986248494 136.19210761854086 136.5783021470118 -0.38619452847095204 -0.2827641890402586 0.10875560547061063 0.11189234314151972 -0.0031367376709090927 -2.8033532794480807 0.07452230347799255 0.07104688223410768 0.003475421243884863 4.891729425132195 0.9307282962514367 0.9310542820602117 -0.0003259858087749645 -0.03501254599824534 0.5999999999999999 0.4000000000000001 0.1999999999999998 49.999999999999936 4.174996403604185 47.99794119802058 50.794260008988424 -2.796318810967847 -5.505186630286606 0.10222958892923095 0.11161526349272373 -0.009385674563492777 -8.408952565976458
6 0.6 4.0 4.0 2789.0434220430398 2982.2460998252786 -193.20267778223888 -6.4784283830083 258688.11700405 277051.95613675 -18363.8391327 -6.628301560749781 136.86774320500828 136.81931587629953 0.04842732870875466 0.035395096371142916 0.10501047827147733 0.10802266412956946 -0.0030121858580921257 -2.788475809557069 0.06914180963767007 0.06698591531512615 0.0021558943225439137 3.2184292957732996 0.9314130089130337 0.9313849217310588 2.8087181974889575e-05 0.003015636319588161 0.7733333333333334 0.5999999999999999 0.17333333333333356 28.888888888888935 4.178300996512875 39.928062615509425 47.86860429278531 -7.940541677275881 -16.588203885594947 0.11297979438696983 0.1162670925925253 -0.0032872982055554695 -2.827367686122743
7 0.8 4.0 4.0 2586.098242115281 2841.1305915063504 -255.03234939106915 -8.97643882169642 239765.24959855 264140.55002745 -24375.300428900024 -9.228155399224729 136.5038826686135 137.28163778418497 -0.7777551155714661 -0.5665397995864124 0.10253056902792507 0.1031498585902154 -0.0006192895622903344 -0.6003784888844036 0.07325665736408164 0.06592454978099352 0.007332107583088124 11.1219683827132 0.9311235469993302 0.9316596013994161 -0.0005360544000858614 -0.05753758124541101 1.0 0.8000000000000002 0.19999999999999984 24.99999999999998 3.5384100686094007 37.14414699970415 37.43809775029793 -0.29395075059377973 -0.7851647606519765 0.09990322635678014 0.10432800196112454 -0.0044247756043444 -4.241215705437541

View File

@@ -1,13 +0,0 @@
alpha,mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
0.0,no_robust,4,3565.2912010160844,52.219179508209216,331300.229069,5038.96659004527,137.28764358955686,0.6434240315013728,0.11861133504329742,0.004019332768284657,0.06445662162531288,0.004080405219050139,0.9317078361627993,0.00038018051704976865,,,,,66.58479574844135,32.282270089830455,0.11615891320235115,0.016558627227281013
0.0,robust,4,3379.9042994670963,54.727408939657735,313527.4707462,5408.058196552377,137.08358925982625,1.047386315387148,0.1146626165658294,0.0025627354157035497,0.06687153537785637,0.008577061675868377,0.9315273502623671,0.0007274203134899985,0.18958333333333333,0.02083333333333336,5.553200113221484,0.45981481828856186,61.35134238638615,30.27964905193963,0.12778212146468534,0.027929667978205217
0.1,no_robust,4,3458.002436284769,60.75923217871363,321215.477968,6016.373193216596,136.82757579763506,1.1899102161551907,0.11704917861668755,0.0021220259908233973,0.06737596899527175,0.006801136773079149,0.9313276818191593,0.0008352263172197586,0.1,0.0,,,53.288869747139515,18.480340945815023,0.11765277436070229,0.017544197575138736
0.1,robust,4,3307.028238366196,35.58495715224888,306772.49146475,3488.2690530060245,137.1182041122497,0.8582218376452346,0.1128546052304944,0.0005963155492967403,0.0685405649303561,0.0050673362512629015,0.9315331673960889,0.0005217376436765336,0.2818749999999999,0.03624999999999999,5.079528726095333,0.6109585102054891,52.44772950699336,29.0263361696475,0.11644381911386253,0.021152545180088765
0.25,no_robust,4,3300.5539051855053,50.460978662647115,306522.90003785,4860.668937531515,136.71752459667877,0.7410676951244369,0.1139905600539111,0.003319948537321803,0.06846858821082077,0.008614994548315848,0.9313053225630614,0.0004919872662680591,0.25,0.0,,,55.2030005738411,26.88247558235345,0.11684259343269415,0.013462146346772591
0.25,robust,4,3134.3438215278165,64.06834403659167,290691.4771835,6331.196493752059,136.89990884669214,1.3796663751798552,0.11113957413522965,0.0015044942041406348,0.06427159998376095,0.0042331619171274894,0.9314501501825461,0.0008939739741734515,0.44833333333333336,0.0033333333333333518,4.7183804755060255,0.4538389380858333,49.04307009982127,28.20484665432831,0.10998505830218755,0.010731404693185651
0.4,no_robust,4,3180.7872854626567,71.87564776824694,295433.5405797,7035.374110540269,136.5783021470118,1.7095219574599192,0.11189234314151972,0.0013821115134030936,0.07104688223410768,0.005766138692685495,0.9310542820602117,0.0013989725050689828,0.4000000000000001,0.0,,,50.794260008988424,24.836708377642946,0.11161526349272373,0.005787749200301594
0.4,robust,4,2983.852437569374,45.51290575912758,276545.26309355,4555.1725323898245,136.19210761854086,1.5546063667946701,0.10875560547061063,0.001118798290958954,0.07452230347799255,0.0040446395928049874,0.9307282962514367,0.0013558080014763189,0.5999999999999999,0.0,4.174996403604185,0.12189448324552496,47.99794119802058,33.51782503281748,0.10222958892923095,0.0031686467591609474
0.6,no_robust,4,2982.2460998252786,39.93674476199945,277051.95613675,3931.02017169463,136.81931587629953,1.1995405806950865,0.10802266412956946,0.000405835985606262,0.06698591531512615,0.002805894772223563,0.9313849217310588,0.0008100530228792662,0.5999999999999999,0.0,,,47.86860429278531,23.830502772642472,0.1162670925925253,0.028676813474186293
0.6,robust,4,2789.0434220430398,35.297482315631626,258688.11700405,3420.6735023624556,136.86774320500828,0.7097303238857778,0.10501047827147733,0.0008273121554488608,0.06914180963767007,0.009066158371268139,0.9314130089130337,0.0005024421703994162,0.7733333333333334,0.053333333333333385,4.178300996512875,0.5865970573865015,39.928062615509425,30.25078643153115,0.11297979438696983,0.0274101056520461
0.8,no_robust,4,2841.1305915063504,21.84043179776092,264140.55002745,2073.353315114627,137.28163778418497,0.6288968799501957,0.1031498585902154,0.0012877581835795701,0.06592454978099352,0.00340700896766341,0.9316596013994161,0.00038430108058413553,0.8000000000000002,0.0,,,37.43809775029793,32.01740090550489,0.10432800196112454,0.018337841526911584
0.8,robust,4,2586.098242115281,48.05539265296157,239765.24959855,4681.6472175597555,136.5038826686135,1.0611320896043694,0.10253056902792507,0.002587472569909977,0.07325665736408164,0.0015359324114246234,0.9311235469993302,0.0006145440308596868,1.0,0.0,3.5384100686094007,0.391972726035734,37.14414699970415,25.614063825315505,0.09990322635678014,0.010269342031085898
1 alpha mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_coi_leakage_mean_mean eval_coi_leakage_mean_std eval_volatility_mean_mean eval_volatility_mean_std eval_margin_mean_mean eval_margin_mean_std train_alpha_adv_mean train_alpha_adv_std train_coi_penalty_mean train_coi_penalty_std train_ux_penalty_mean train_ux_penalty_std train_agent_prob_mean train_agent_prob_std
2 0.0 no_robust 4 3565.2912010160844 52.219179508209216 331300.229069 5038.96659004527 137.28764358955686 0.6434240315013728 0.11861133504329742 0.004019332768284657 0.06445662162531288 0.004080405219050139 0.9317078361627993 0.00038018051704976865 66.58479574844135 32.282270089830455 0.11615891320235115 0.016558627227281013
3 0.0 robust 4 3379.9042994670963 54.727408939657735 313527.4707462 5408.058196552377 137.08358925982625 1.047386315387148 0.1146626165658294 0.0025627354157035497 0.06687153537785637 0.008577061675868377 0.9315273502623671 0.0007274203134899985 0.18958333333333333 0.02083333333333336 5.553200113221484 0.45981481828856186 61.35134238638615 30.27964905193963 0.12778212146468534 0.027929667978205217
4 0.1 no_robust 4 3458.002436284769 60.75923217871363 321215.477968 6016.373193216596 136.82757579763506 1.1899102161551907 0.11704917861668755 0.0021220259908233973 0.06737596899527175 0.006801136773079149 0.9313276818191593 0.0008352263172197586 0.1 0.0 53.288869747139515 18.480340945815023 0.11765277436070229 0.017544197575138736
5 0.1 robust 4 3307.028238366196 35.58495715224888 306772.49146475 3488.2690530060245 137.1182041122497 0.8582218376452346 0.1128546052304944 0.0005963155492967403 0.0685405649303561 0.0050673362512629015 0.9315331673960889 0.0005217376436765336 0.2818749999999999 0.03624999999999999 5.079528726095333 0.6109585102054891 52.44772950699336 29.0263361696475 0.11644381911386253 0.021152545180088765
6 0.25 no_robust 4 3300.5539051855053 50.460978662647115 306522.90003785 4860.668937531515 136.71752459667877 0.7410676951244369 0.1139905600539111 0.003319948537321803 0.06846858821082077 0.008614994548315848 0.9313053225630614 0.0004919872662680591 0.25 0.0 55.2030005738411 26.88247558235345 0.11684259343269415 0.013462146346772591
7 0.25 robust 4 3134.3438215278165 64.06834403659167 290691.4771835 6331.196493752059 136.89990884669214 1.3796663751798552 0.11113957413522965 0.0015044942041406348 0.06427159998376095 0.0042331619171274894 0.9314501501825461 0.0008939739741734515 0.44833333333333336 0.0033333333333333518 4.7183804755060255 0.4538389380858333 49.04307009982127 28.20484665432831 0.10998505830218755 0.010731404693185651
8 0.4 no_robust 4 3180.7872854626567 71.87564776824694 295433.5405797 7035.374110540269 136.5783021470118 1.7095219574599192 0.11189234314151972 0.0013821115134030936 0.07104688223410768 0.005766138692685495 0.9310542820602117 0.0013989725050689828 0.4000000000000001 0.0 50.794260008988424 24.836708377642946 0.11161526349272373 0.005787749200301594
9 0.4 robust 4 2983.852437569374 45.51290575912758 276545.26309355 4555.1725323898245 136.19210761854086 1.5546063667946701 0.10875560547061063 0.001118798290958954 0.07452230347799255 0.0040446395928049874 0.9307282962514367 0.0013558080014763189 0.5999999999999999 0.0 4.174996403604185 0.12189448324552496 47.99794119802058 33.51782503281748 0.10222958892923095 0.0031686467591609474
10 0.6 no_robust 4 2982.2460998252786 39.93674476199945 277051.95613675 3931.02017169463 136.81931587629953 1.1995405806950865 0.10802266412956946 0.000405835985606262 0.06698591531512615 0.002805894772223563 0.9313849217310588 0.0008100530228792662 0.5999999999999999 0.0 47.86860429278531 23.830502772642472 0.1162670925925253 0.028676813474186293
11 0.6 robust 4 2789.0434220430398 35.297482315631626 258688.11700405 3420.6735023624556 136.86774320500828 0.7097303238857778 0.10501047827147733 0.0008273121554488608 0.06914180963767007 0.009066158371268139 0.9314130089130337 0.0005024421703994162 0.7733333333333334 0.053333333333333385 4.178300996512875 0.5865970573865015 39.928062615509425 30.25078643153115 0.11297979438696983 0.0274101056520461
12 0.8 no_robust 4 2841.1305915063504 21.84043179776092 264140.55002745 2073.353315114627 137.28163778418497 0.6288968799501957 0.1031498585902154 0.0012877581835795701 0.06592454978099352 0.00340700896766341 0.9316596013994161 0.00038430108058413553 0.8000000000000002 0.0 37.43809775029793 32.01740090550489 0.10432800196112454 0.018337841526911584
13 0.8 robust 4 2586.098242115281 48.05539265296157 239765.24959855 4681.6472175597555 136.5038826686135 1.0611320896043694 0.10253056902792507 0.002587472569909977 0.07325665736408164 0.0015359324114246234 0.9311235469993302 0.0006145440308596868 1.0 0.0 3.5384100686094007 0.391972726035734 37.14414699970415 25.614063825315505 0.09990322635678014 0.010269342031085898

View File

@@ -1,7 +0,0 @@
{
"status": "ok",
"revenue_delta": -191.29017636530716,
"revenue_delta_pct": -5.938226273545598,
"coi_leakage_delta": -0.002960415145605702,
"coi_leakage_delta_pct": -2.6404147469510946
}

View File

@@ -1,3 +0,0 @@
mode,runs,eval_revenue_mean_mean,eval_revenue_mean_std,eval_reward_mean_mean,eval_reward_mean_std,eval_coi_level_mean_mean,eval_coi_level_mean_std,eval_coi_leakage_mean_mean,eval_coi_leakage_mean_std,eval_volatility_mean_mean,eval_volatility_mean_std,eval_margin_mean_mean,eval_margin_mean_std,train_alpha_adv_mean,train_alpha_adv_std,train_coi_penalty_mean,train_coi_penalty_std,train_ux_penalty_mean,train_ux_penalty_std,train_agent_prob_mean,train_agent_prob_std
no_robust,24,3221.335253213441,262.46595166337727,299277.442303125,24382.561944761477,136.9186666318945,1.0038463876967063,0.11211932326253345,0.005805494533542669,0.06737642102693879,0.005402738047823369,0.9314066076226178,0.0007436370959663933,0.43,0.2546411303445653,,,51.86293802024894,25.340287421525442,0.11381077317368686,0.016664235359362907
robust,24,3030.0450768481337,288.262657026656,280998.34484843333,26820.020161880373,136.77757261848845,1.06224696086916,0.10915890811692774,0.004616462637659704,0.06943407846195294,0.006435789449278624,0.9312959200008004,0.0007858424519830652,0.5488541666666666,0.2860373751485706,4.540469463924883,0.7906156355346259,47.985382134405825,27.407657819442747,0.11155393475895271,0.01943348418653492
1 mode runs eval_revenue_mean_mean eval_revenue_mean_std eval_reward_mean_mean eval_reward_mean_std eval_coi_level_mean_mean eval_coi_level_mean_std eval_coi_leakage_mean_mean eval_coi_leakage_mean_std eval_volatility_mean_mean eval_volatility_mean_std eval_margin_mean_mean eval_margin_mean_std train_alpha_adv_mean train_alpha_adv_std train_coi_penalty_mean train_coi_penalty_std train_ux_penalty_mean train_ux_penalty_std train_agent_prob_mean train_agent_prob_std
2 no_robust 24 3221.335253213441 262.46595166337727 299277.442303125 24382.561944761477 136.9186666318945 1.0038463876967063 0.11211932326253345 0.005805494533542669 0.06737642102693879 0.005402738047823369 0.9314066076226178 0.0007436370959663933 0.43 0.2546411303445653 51.86293802024894 25.340287421525442 0.11381077317368686 0.016664235359362907
3 robust 24 3030.0450768481337 288.262657026656 280998.34484843333 26820.020161880373 136.77757261848845 1.06224696086916 0.10915890811692774 0.004616462637659704 0.06943407846195294 0.006435789449278624 0.9312959200008004 0.0007858424519830652 0.5488541666666666 0.2860373751485706 4.540469463924883 0.7906156355346259 47.985382134405825 27.407657819442747 0.11155393475895271 0.01943348418653492

View File

@@ -1,25 +0,0 @@
alpha,metric,direction,wins,ties,total_pairs,win_probability
0.0,eval/revenue_mean,higher,0,0,16,0.0
0.0,eval/reward_mean,higher,0,0,16,0.0
0.0,eval/coi_leakage_mean,lower,14,0,16,0.875
0.0,eval/volatility_mean,lower,8,0,16,0.5
0.1,eval/revenue_mean,higher,0,0,16,0.0
0.1,eval/reward_mean,higher,0,0,16,0.0
0.1,eval/coi_leakage_mean,lower,16,0,16,1.0
0.1,eval/volatility_mean,lower,8,0,16,0.5
0.25,eval/revenue_mean,higher,0,0,16,0.0
0.25,eval/reward_mean,higher,0,0,16,0.0
0.25,eval/coi_leakage_mean,lower,12,0,16,0.75
0.25,eval/volatility_mean,lower,11,0,16,0.6875
0.4,eval/revenue_mean,higher,0,0,16,0.0
0.4,eval/reward_mean,higher,0,0,16,0.0
0.4,eval/coi_leakage_mean,lower,16,0,16,1.0
0.4,eval/volatility_mean,lower,6,0,16,0.375
0.6,eval/revenue_mean,higher,0,0,16,0.0
0.6,eval/reward_mean,higher,0,0,16,0.0
0.6,eval/coi_leakage_mean,lower,16,0,16,1.0
0.6,eval/volatility_mean,lower,7,0,16,0.4375
0.8,eval/revenue_mean,higher,0,0,16,0.0
0.8,eval/reward_mean,higher,0,0,16,0.0
0.8,eval/coi_leakage_mean,lower,11,0,16,0.6875
0.8,eval/volatility_mean,lower,0,0,16,0.0
1 alpha metric direction wins ties total_pairs win_probability
2 0.0 eval/revenue_mean higher 0 0 16 0.0
3 0.0 eval/reward_mean higher 0 0 16 0.0
4 0.0 eval/coi_leakage_mean lower 14 0 16 0.875
5 0.0 eval/volatility_mean lower 8 0 16 0.5
6 0.1 eval/revenue_mean higher 0 0 16 0.0
7 0.1 eval/reward_mean higher 0 0 16 0.0
8 0.1 eval/coi_leakage_mean lower 16 0 16 1.0
9 0.1 eval/volatility_mean lower 8 0 16 0.5
10 0.25 eval/revenue_mean higher 0 0 16 0.0
11 0.25 eval/reward_mean higher 0 0 16 0.0
12 0.25 eval/coi_leakage_mean lower 12 0 16 0.75
13 0.25 eval/volatility_mean lower 11 0 16 0.6875
14 0.4 eval/revenue_mean higher 0 0 16 0.0
15 0.4 eval/reward_mean higher 0 0 16 0.0
16 0.4 eval/coi_leakage_mean lower 16 0 16 1.0
17 0.4 eval/volatility_mean lower 6 0 16 0.375
18 0.6 eval/revenue_mean higher 0 0 16 0.0
19 0.6 eval/reward_mean higher 0 0 16 0.0
20 0.6 eval/coi_leakage_mean lower 16 0 16 1.0
21 0.6 eval/volatility_mean lower 7 0 16 0.4375
22 0.8 eval/revenue_mean higher 0 0 16 0.0
23 0.8 eval/reward_mean higher 0 0 16 0.0
24 0.8 eval/coi_leakage_mean lower 11 0 16 0.6875
25 0.8 eval/volatility_mean lower 0 0 16 0.0

View File

@@ -1 +0,0 @@
\includegraphics[width=0.99\linewidth]{chapters/figures/results/generated/legacy/plots/first_sweep_tier_revenue.pdf}

View File

@@ -1 +0,0 @@
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_alpha_curves.pdf}

View File

@@ -1 +0,0 @@
\includegraphics[width=0.98\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_delta_curves.pdf}

View File

@@ -1 +0,0 @@
\includegraphics[width=0.88\linewidth]{chapters/figures/results/generated/legacy/plots/ppo_tradeoff_scatter.pdf}

View File

@@ -1,313 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import numpy as np
import pandas as pd
from process_first_sweep import run as run_first_sweep
from process_ppo_benchmark import run as run_ppo_benchmark
def _output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _plot_dir() -> Path:
return _output_dir() / "plots"
def _configure_style() -> None:
plt.rcParams.update(
{
"font.family": "serif",
"font.size": 10,
"axes.titlesize": 10,
"axes.labelsize": 9,
"legend.fontsize": 8,
"xtick.labelsize": 8,
"ytick.labelsize": 8,
"figure.dpi": 220,
"savefig.dpi": 320,
"axes.spines.top": False,
"axes.spines.right": False,
"axes.grid": True,
"grid.alpha": 0.22,
}
)
def _fmt_thousands(value: float, _: int) -> str:
return f"{int(value):,}"
def _load_csv(path: Path) -> pd.DataFrame:
if not path.exists():
raise FileNotFoundError(f"Missing required input: {path}")
return pd.read_csv(path)
def _plot_ppo_alpha_curves(alpha_mode: pd.DataFrame, out_dir: Path) -> Path:
fig, axes = plt.subplots(2, 2, figsize=(9.3, 6.4), constrained_layout=True)
robust_color = "#C44E52"
baseline_color = "#4C72B0"
mode_colors = {"robust": robust_color, "no_robust": baseline_color}
mode_labels = {"robust": "Robust", "no_robust": "Non-robust"}
panels = [
("eval_revenue_mean", "Mean Episode Revenue", "Revenue"),
("eval_reward_mean", "Mean Episode Reward", "Reward"),
("eval_coi_leakage_mean", "Mean COI Leakage", "COI Leakage"),
("eval_volatility_mean", "Mean Price Volatility", "Volatility"),
]
for ax, (metric_prefix, title, ylabel) in zip(axes.flat, panels):
mean_col = f"{metric_prefix}_mean"
std_col = f"{metric_prefix}_std"
for mode in ("no_robust", "robust"):
sub = alpha_mode[alpha_mode["mode"] == mode].sort_values("alpha")
if sub.empty:
continue
x = sub["alpha"].to_numpy(dtype=float)
y = sub[mean_col].to_numpy(dtype=float)
ax.plot(
x,
y,
marker="o",
linewidth=1.8,
markersize=4,
color=mode_colors[mode],
label=mode_labels[mode],
)
if std_col in sub.columns:
sigma = sub[std_col].fillna(0.0).to_numpy(dtype=float)
ax.fill_between(
x,
y - sigma,
y + sigma,
color=mode_colors[mode],
alpha=0.14,
linewidth=0,
)
ax.set_title(title)
ax.set_xlabel(r"Contamination $\alpha$")
ax.set_ylabel(ylabel)
ax.set_xticks(sorted(alpha_mode["alpha"].unique()))
if metric_prefix in {"eval_revenue_mean", "eval_reward_mean"}:
ax.yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
handles, labels = axes.flat[0].get_legend_handles_labels()
fig.legend(handles, labels, ncol=2, loc="upper center", bbox_to_anchor=(0.5, 1.02))
out_path = out_dir / "ppo_alpha_curves.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_ppo_delta_curves(deltas: pd.DataFrame, out_dir: Path) -> Path:
fig, axes = plt.subplots(2, 1, figsize=(8.6, 6.0), constrained_layout=True)
deltas = deltas.sort_values("alpha")
x = deltas["alpha"].to_numpy(dtype=float)
top_metrics = [
("eval_revenue_mean_delta_pct", "Revenue", "#4C72B0"),
("eval_reward_mean_delta_pct", "Reward", "#8172B3"),
]
for col, label, color in top_metrics:
axes[0].plot(
x,
deltas[col].to_numpy(dtype=float),
marker="o",
linewidth=1.8,
markersize=4,
color=color,
label=label,
)
axes[0].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[0].set_title("Robust Minus Non-robust Delta by Contamination")
axes[0].set_ylabel("Delta (%)")
axes[0].set_xlabel(r"Contamination $\alpha$")
axes[0].set_xticks(x)
axes[0].legend(loc="lower left")
bottom_metrics = [
("eval_coi_leakage_mean_delta_pct", "COI Leakage", "#55A868"),
("eval_volatility_mean_delta_pct", "Volatility", "#DD8452"),
]
for col, label, color in bottom_metrics:
axes[1].plot(
x,
deltas[col].to_numpy(dtype=float),
marker="o",
linewidth=1.8,
markersize=4,
color=color,
label=label,
)
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[1].set_ylabel("Delta (%)")
axes[1].set_xlabel(r"Contamination $\alpha$")
axes[1].set_xticks(x)
axes[1].legend(loc="lower left")
out_path = out_dir / "ppo_delta_curves.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_ppo_tradeoff_scatter(deltas: pd.DataFrame, out_dir: Path) -> Path:
fig, ax = plt.subplots(figsize=(6.4, 5.2), constrained_layout=True)
data = deltas.sort_values("alpha")
x = data["eval_coi_leakage_mean_delta_pct"].to_numpy(dtype=float)
y = data["eval_revenue_mean_delta_pct"].to_numpy(dtype=float)
alphas = data["alpha"].to_numpy(dtype=float)
scatter = ax.scatter(
x,
y,
c=alphas,
cmap="viridis",
s=72,
edgecolor="#222222",
linewidth=0.5,
)
for x_i, y_i, alpha in zip(x, y, alphas):
ax.annotate(
rf"$\alpha={alpha:.2f}$",
(x_i, y_i),
textcoords="offset points",
xytext=(5, 4),
fontsize=8,
)
ax.axhline(0.0, color="#555555", linewidth=1.0, linestyle="--")
ax.axvline(0.0, color="#555555", linewidth=1.0, linestyle="--")
ax.set_xlabel("COI Leakage Delta (%)")
ax.set_ylabel("Revenue Delta (%)")
ax.set_title("PPO Robust Tradeoff Frontier")
cbar = fig.colorbar(scatter, ax=ax)
cbar.set_label(r"Contamination $\alpha$")
out_path = out_dir / "ppo_tradeoff_scatter.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def _plot_first_sweep_tier_revenue(tier_mode: pd.DataFrame, out_dir: Path) -> Path:
pivot = (
tier_mode.pivot(index="tier", columns="mode", values="eval_revenue_mean_mean")
.dropna(subset=["robust", "no_robust"], how="any")
.copy()
)
if pivot.empty:
raise ValueError("First sweep tier summary missing robust/non-robust pairs")
order = sorted(pivot.index.tolist())
pivot = pivot.loc[order]
delta_pct = 100.0 * (pivot["robust"] - pivot["no_robust"]) / pivot["no_robust"]
fig, axes = plt.subplots(1, 2, figsize=(10.2, 4.3), constrained_layout=True)
x = np.arange(len(order))
width = 0.36
axes[0].bar(
x - width / 2,
pivot["no_robust"].to_numpy(dtype=float),
width=width,
label="Non-robust",
color="#4C72B0",
)
axes[0].bar(
x + width / 2,
pivot["robust"].to_numpy(dtype=float),
width=width,
label="Robust",
color="#C44E52",
)
axes[0].set_xticks(x)
axes[0].set_xticklabels(order, rotation=20)
axes[0].set_ylabel("Mean Revenue")
axes[0].set_yscale("log")
axes[0].yaxis.set_major_formatter(FuncFormatter(_fmt_thousands))
axes[0].set_title("First Sweep Tier Revenue (log scale)")
axes[0].legend()
axes[1].bar(x, delta_pct.to_numpy(dtype=float), color="#55A868", width=0.55)
axes[1].axhline(0.0, color="#444444", linewidth=1.0, linestyle="--")
axes[1].set_xticks(x)
axes[1].set_xticklabels(order, rotation=20)
axes[1].set_ylabel("Revenue Delta (%)")
axes[1].set_title("Robust Minus Non-robust by Tier")
out_path = out_dir / "first_sweep_tier_revenue.pdf"
fig.savefig(out_path, bbox_inches="tight")
plt.close(fig)
return out_path
def build_plots(data_dir: Path, out_dir: Path) -> list[Path]:
alpha_mode = _load_csv(data_dir / "ppo_alpha_mode_summary.csv")
deltas = _load_csv(data_dir / "ppo_alpha_deltas.csv")
tier_mode = _load_csv(data_dir / "first_sweep_tier_mode_summary.csv")
out_dir.mkdir(parents=True, exist_ok=True)
paths = [
_plot_ppo_alpha_curves(alpha_mode, out_dir),
_plot_ppo_delta_curves(deltas, out_dir),
_plot_ppo_tradeoff_scatter(deltas, out_dir),
_plot_first_sweep_tier_revenue(tier_mode, out_dir),
]
return paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Create paper-ready plots from result CSVs"
)
parser.add_argument("--data-dir", type=Path, default=_output_dir())
parser.add_argument("--plot-dir", type=Path, default=_plot_dir())
parser.add_argument(
"--refresh-data",
action="store_true",
help="Regenerate processed CSVs before plotting",
)
args = parser.parse_args()
_configure_style()
if bool(args.refresh_data):
run_ppo_benchmark(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "ppo_benchmark.csv",
output_dir=args.data_dir,
include_non_finished=False,
)
run_first_sweep(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "first_sweep.csv",
output_dir=args.data_dir,
include_non_finished=False,
top_n=25,
)
outputs = build_plots(data_dir=args.data_dir, out_dir=args.plot_dir)
for path in outputs:
print(path)
if __name__ == "__main__":
main()

View File

@@ -1,51 +0,0 @@
from __future__ import annotations
import argparse
from pathlib import Path
from process_first_sweep import run as run_first_sweep
from process_ppo_benchmark import run as run_ppo_benchmark
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def main() -> None:
parser = argparse.ArgumentParser(
description="Process all result CSV exports for paper figures"
)
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
parser.add_argument("--top-n", type=int, default=25)
args = parser.parse_args()
written: list[Path] = []
written.extend(
run_ppo_benchmark(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "ppo_benchmark.csv",
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
)
)
written.extend(
run_first_sweep(
input_path=Path(__file__).resolve().parents[5]
/ "tpu_orchestration"
/ "results"
/ "first_sweep.csv",
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
top_n=int(args.top_n),
)
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -639,7 +639,7 @@ def run(
)
)
include_dir = Path(__file__).resolve().parent / "includes" / "final"
include_dir = Path(__file__).resolve().parent / "includes"
written.append(
_write_include(
include_dir / "final_focus_revenue_by_alpha.tex",

View File

@@ -1,272 +0,0 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Iterable
import numpy as np
import pandas as pd
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_input() -> Path:
return _project_root() / "tpu_orchestration" / "results" / "first_sweep.csv"
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _sanitize(key: str) -> str:
return key.replace("/", "_").replace("-", "_")
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
if "study/alpha" in frame.columns:
return pd.to_numeric(frame["study/alpha"], errors="coerce")
if "alpha" in frame.columns:
return pd.to_numeric(frame["alpha"], errors="coerce")
return pd.Series(np.nan, index=frame.index, dtype=float)
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
if "study/mode" in frame.columns:
return frame["study/mode"].astype(str).str.strip().str.lower()
if "study/no_robust" in frame.columns:
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
return pd.Series(
np.where(no_robust > 0.5, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
if "no_robust" in frame.columns:
no_robust = (
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
)
return pd.Series(
np.where(no_robust, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
return pd.Series("", index=frame.index, dtype="object")
def _extract_tier(frame: pd.DataFrame) -> pd.Series:
for column in ("tiers", "runtime/backend", "algo", "run.backend", "run.algo"):
if column in frame.columns:
tier = frame[column].astype(str).str.strip().str.lower()
if tier.notna().any():
return tier
return pd.Series("unknown", index=frame.index, dtype="object")
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
data = frame.copy()
if not include_non_finished and "State" in data.columns:
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
data["alpha"] = _extract_alpha(data)
data["mode"] = _extract_mode(data)
data["tier"] = _extract_tier(data)
data = data[data["mode"].isin({"robust", "no_robust"})]
data = data[data["alpha"].notna()]
_coerce_numeric(
data,
[
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/margin_mean",
"eval/volatility_mean",
"objective/score",
"train/alpha_adv",
"lambda_coi",
"robust_radius",
"learning_rate",
"batch_size",
"n_steps",
"total_timesteps",
],
)
return data.sort_values(["tier", "alpha", "mode"]).reset_index(drop=True)
def _group_summary(
frame: pd.DataFrame, by: list[str], metrics: list[str]
) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return frame.groupby(by, as_index=False).agg(**agg_spec).sort_values(by)
def _tier_alpha_deltas(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
rows: list[dict[str, float | str]] = []
for (tier, alpha), group in summary.groupby(["tier", "alpha"], sort=True):
robust = group[group["mode"] == "robust"]
no_robust = group[group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
row: dict[str, float | str] = {
"tier": str(tier),
"alpha": float(alpha),
"runs_robust": float(robust["runs"].iloc[0]),
"runs_no_robust": float(no_robust["runs"].iloc[0]),
}
for metric in metrics:
safe = _sanitize(metric)
robust_value = float(robust[f"{safe}_mean"].iloc[0])
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
delta = robust_value - no_robust_value
row[f"{safe}_delta"] = delta
row[f"{safe}_delta_pct"] = (
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
)
rows.append(row)
return pd.DataFrame(rows)
def _top_runs(frame: pd.DataFrame, n: int) -> pd.DataFrame:
rank_metric = "objective/score"
if rank_metric not in frame.columns or frame[rank_metric].notna().sum() == 0:
rank_metric = "eval/reward_mean"
keep = [
"Name",
"tier",
"alpha",
"mode",
rank_metric,
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"lambda_coi",
"robust_radius",
"learning_rate",
"batch_size",
"n_steps",
"total_timesteps",
]
present = [column for column in keep if column in frame.columns]
ranked = frame[present].copy().sort_values(rank_metric, ascending=False)
return ranked.head(max(1, int(n))).reset_index(drop=True)
def _headline_json(
frame: pd.DataFrame, tier_mode: pd.DataFrame
) -> dict[str, float | str]:
out: dict[str, float | str] = {
"runs": int(len(frame)),
"tiers": int(frame["tier"].nunique()),
"alphas": int(frame["alpha"].nunique()),
}
robust_rows = tier_mode[tier_mode["mode"] == "robust"]
no_robust_rows = tier_mode[tier_mode["mode"] == "no_robust"]
if robust_rows.empty or no_robust_rows.empty:
out["status"] = "incomplete_modes"
return out
robust_mean = robust_rows["eval_revenue_mean_mean"].mean()
no_robust_mean = no_robust_rows["eval_revenue_mean_mean"].mean()
out.update(
{
"status": "ok",
"mean_tier_revenue_robust": float(robust_mean),
"mean_tier_revenue_no_robust": float(no_robust_mean),
"mean_tier_revenue_delta": float(robust_mean - no_robust_mean),
"mean_tier_revenue_delta_pct": float(
100.0 * (robust_mean - no_robust_mean) / no_robust_mean
)
if no_robust_mean
else np.nan,
}
)
return out
def run(
input_path: Path, output_dir: Path, include_non_finished: bool, top_n: int
) -> list[Path]:
output_dir.mkdir(parents=True, exist_ok=True)
raw = pd.read_csv(input_path)
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
metrics = [
metric
for metric in (
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/margin_mean",
"eval/volatility_mean",
"objective/score",
"train/alpha_adv",
)
if metric in frame.columns
]
tier_mode = _group_summary(frame, ["tier", "mode"], metrics)
tier_alpha_mode = _group_summary(frame, ["tier", "alpha", "mode"], metrics)
deltas = _tier_alpha_deltas(tier_alpha_mode, metrics)
top_configs = _top_runs(frame, n=top_n)
headline = _headline_json(frame, tier_mode)
outputs = {
"first_sweep_tier_mode_summary.csv": tier_mode,
"first_sweep_tier_alpha_mode_summary.csv": tier_alpha_mode,
"first_sweep_tier_alpha_deltas.csv": deltas,
"first_sweep_top_configs.csv": top_configs,
}
written_paths: list[Path] = []
for filename, table in outputs.items():
path = output_dir / filename
table.to_csv(path, index=False)
written_paths.append(path)
headline_path = output_dir / "first_sweep_headline_summary.json"
headline_path.write_text(json.dumps(headline, indent=2))
written_paths.append(headline_path)
return written_paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Process first sweep CSV for paper tables"
)
parser.add_argument("--input", type=Path, default=_default_input())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
parser.add_argument("--top-n", type=int, default=25)
args = parser.parse_args()
written = run(
input_path=args.input,
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
top_n=int(args.top_n),
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -1,277 +0,0 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Iterable
import numpy as np
import pandas as pd
def _project_root() -> Path:
return Path(__file__).resolve().parents[5]
def _default_input() -> Path:
return _project_root() / "tpu_orchestration" / "results" / "ppo_benchmark.csv"
def _default_output_dir() -> Path:
return Path(__file__).resolve().parent / "generated" / "legacy"
def _sanitize(key: str) -> str:
return key.replace("/", "_").replace("-", "_")
def _coerce_numeric(frame: pd.DataFrame, columns: Iterable[str]) -> None:
for column in columns:
if column in frame.columns:
frame[column] = pd.to_numeric(frame[column], errors="coerce")
def _extract_alpha(frame: pd.DataFrame) -> pd.Series:
if "study/alpha" in frame.columns:
return pd.to_numeric(frame["study/alpha"], errors="coerce")
if "alpha" in frame.columns:
return pd.to_numeric(frame["alpha"], errors="coerce")
return pd.Series(np.nan, index=frame.index, dtype=float)
def _extract_mode(frame: pd.DataFrame) -> pd.Series:
if "study/mode" in frame.columns:
return frame["study/mode"].astype(str).str.strip().str.lower()
if "study/no_robust" in frame.columns:
no_robust = pd.to_numeric(frame["study/no_robust"], errors="coerce").fillna(0.0)
return pd.Series(
np.where(no_robust > 0.5, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
if "no_robust" in frame.columns:
no_robust = (
frame["no_robust"].astype(str).str.lower().isin({"1", "true", "yes"})
)
return pd.Series(
np.where(no_robust, "no_robust", "robust"),
index=frame.index,
dtype="object",
)
return pd.Series("", index=frame.index, dtype="object")
def _prepare_frame(frame: pd.DataFrame, include_non_finished: bool) -> pd.DataFrame:
data = frame.copy()
if not include_non_finished and "State" in data.columns:
data = data[data["State"].astype(str).str.lower() == "finished"].copy()
data["alpha"] = _extract_alpha(data)
data["mode"] = _extract_mode(data)
data = data[data["mode"].isin({"robust", "no_robust"})]
data = data[data["alpha"].notna()]
numeric_cols = [
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/volatility_mean",
"eval/margin_mean",
"train/alpha_adv",
"train/coi_penalty",
"train/ux_penalty",
"train/agent_prob",
]
_coerce_numeric(data, numeric_cols)
return data.sort_values(["alpha", "mode"]).reset_index(drop=True)
def _summary_by_alpha_mode(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return (
frame.groupby(["alpha", "mode"], as_index=False)
.agg(**agg_spec)
.sort_values(["alpha", "mode"])
.reset_index(drop=True)
)
def _delta_by_alpha(summary: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
rows: list[dict[str, float]] = []
for alpha, alpha_group in summary.groupby("alpha", sort=True):
robust = alpha_group[alpha_group["mode"] == "robust"]
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
row: dict[str, float] = {
"alpha": float(alpha),
"runs_robust": float(robust["runs"].iloc[0]),
"runs_no_robust": float(no_robust["runs"].iloc[0]),
}
for metric in metrics:
safe = _sanitize(metric)
robust_value = float(robust[f"{safe}_mean"].iloc[0])
no_robust_value = float(no_robust[f"{safe}_mean"].iloc[0])
delta = robust_value - no_robust_value
row[f"{safe}_robust"] = robust_value
row[f"{safe}_no_robust"] = no_robust_value
row[f"{safe}_delta"] = delta
row[f"{safe}_delta_pct"] = (
np.nan if no_robust_value == 0 else 100.0 * delta / no_robust_value
)
rows.append(row)
return pd.DataFrame(rows)
def _pairwise_win_rates(frame: pd.DataFrame) -> pd.DataFrame:
rules = {
"eval/revenue_mean": "higher",
"eval/reward_mean": "higher",
"eval/coi_leakage_mean": "lower",
"eval/volatility_mean": "lower",
}
rows: list[dict[str, float]] = []
for alpha, alpha_group in frame.groupby("alpha", sort=True):
robust = alpha_group[alpha_group["mode"] == "robust"]
no_robust = alpha_group[alpha_group["mode"] == "no_robust"]
if robust.empty or no_robust.empty:
continue
for metric, direction in rules.items():
if metric not in frame.columns:
continue
robust_values = robust[metric].dropna().to_numpy(dtype=float)
no_robust_values = no_robust[metric].dropna().to_numpy(dtype=float)
if robust_values.size == 0 or no_robust_values.size == 0:
continue
if direction == "higher":
wins = (robust_values[:, None] > no_robust_values[None, :]).sum()
else:
wins = (robust_values[:, None] < no_robust_values[None, :]).sum()
ties = (robust_values[:, None] == no_robust_values[None, :]).sum()
total = robust_values.size * no_robust_values.size
win_prob = (wins + 0.5 * ties) / total
rows.append(
{
"alpha": float(alpha),
"metric": metric,
"direction": direction,
"wins": int(wins),
"ties": int(ties),
"total_pairs": int(total),
"win_probability": float(win_prob),
}
)
return pd.DataFrame(rows)
def _overall_mode_summary(frame: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
agg_spec: dict[str, tuple[str, str]] = {"runs": ("mode", "size")}
for metric in metrics:
safe = _sanitize(metric)
agg_spec[f"{safe}_mean"] = (metric, "mean")
agg_spec[f"{safe}_std"] = (metric, "std")
return frame.groupby("mode", as_index=False).agg(**agg_spec).sort_values("mode")
def _headline_json(overall: pd.DataFrame) -> dict[str, float | str]:
if {"robust", "no_robust"} - set(overall["mode"].tolist()):
return {"status": "incomplete_modes"}
robust = overall[overall["mode"] == "robust"].iloc[0]
no_robust = overall[overall["mode"] == "no_robust"].iloc[0]
revenue_delta = float(
robust["eval_revenue_mean_mean"] - no_robust["eval_revenue_mean_mean"]
)
leakage_delta = float(
robust["eval_coi_leakage_mean_mean"] - no_robust["eval_coi_leakage_mean_mean"]
)
return {
"status": "ok",
"revenue_delta": revenue_delta,
"revenue_delta_pct": float(
100.0 * revenue_delta / no_robust["eval_revenue_mean_mean"]
),
"coi_leakage_delta": leakage_delta,
"coi_leakage_delta_pct": float(
100.0 * leakage_delta / no_robust["eval_coi_leakage_mean_mean"]
),
}
def run(input_path: Path, output_dir: Path, include_non_finished: bool) -> list[Path]:
output_dir.mkdir(parents=True, exist_ok=True)
raw = pd.read_csv(input_path)
frame = _prepare_frame(raw, include_non_finished=include_non_finished)
metrics = [
metric
for metric in (
"eval/revenue_mean",
"eval/reward_mean",
"eval/coi_level_mean",
"eval/coi_leakage_mean",
"eval/volatility_mean",
"eval/margin_mean",
"train/alpha_adv",
"train/coi_penalty",
"train/ux_penalty",
"train/agent_prob",
)
if metric in frame.columns
]
alpha_mode = _summary_by_alpha_mode(frame, metrics)
deltas = _delta_by_alpha(alpha_mode, metrics)
win_rates = _pairwise_win_rates(frame)
overall = _overall_mode_summary(frame, metrics)
headline = _headline_json(overall)
outputs = {
"ppo_alpha_mode_summary.csv": alpha_mode,
"ppo_alpha_deltas.csv": deltas,
"ppo_pairwise_win_rates.csv": win_rates,
"ppo_overall_mode_summary.csv": overall,
}
written_paths: list[Path] = []
for filename, table in outputs.items():
path = output_dir / filename
table.to_csv(path, index=False)
written_paths.append(path)
headline_path = output_dir / "ppo_headline_summary.json"
headline_path.write_text(json.dumps(headline, indent=2))
written_paths.append(headline_path)
return written_paths
def main() -> None:
parser = argparse.ArgumentParser(
description="Process PPO benchmark CSV for paper tables"
)
parser.add_argument("--input", type=Path, default=_default_input())
parser.add_argument("--output-dir", type=Path, default=_default_output_dir())
parser.add_argument("--include-non-finished", action="store_true")
args = parser.parse_args()
written = run(
input_path=args.input,
output_dir=args.output_dir,
include_non_finished=bool(args.include_non_finished),
)
for path in written:
print(path)
if __name__ == "__main__":
main()

View File

@@ -1,63 +0,0 @@
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import stats
root = Path(__file__).resolve().parents[5]
runs = (
root
/ "engine/studies/results/wandb_sweep_bundles/bundle_20260317_122818/runs_finished.csv"
)
df = pd.read_csv(runs)
df = df[
(df["sweep_id"].astype(str) == "i88nw811")
& (df["study_mode"].astype(str) == "baseline")
& (pd.to_numeric(df["n_products"], errors="coerce") == 100.0)
& (pd.to_numeric(df["eta_ux"], errors="coerce") == 0.0)
].copy()
alpha = pd.to_numeric(df["alpha"], errors="coerce")
revenue = pd.to_numeric(df["eval_revenue_mean"], errors="coerce")
mask = alpha.notna() & revenue.notna()
alpha = alpha[mask].to_numpy(dtype=float)
revenue = revenue[mask].to_numpy(dtype=float)
if len(alpha) < 3 or np.unique(alpha).size < 2:
raise ValueError("Not enough data for regression")
fit = stats.linregress(alpha, revenue)
n = len(alpha)
dof = n - 2
t_stat = fit.slope / fit.stderr
p_val = 2.0 * stats.t.sf(abs(t_stat), df=dof)
r2 = fit.rvalue**2
t_crit = stats.t.ppf(0.975, dof)
slope_ci = (fit.slope - t_crit * fit.stderr, fit.slope + t_crit * fit.stderr)
x = np.column_stack([np.ones(n), alpha])
beta = np.linalg.lstsq(x, revenue, rcond=None)[0]
resid = revenue - x @ beta
xtx_inv = np.linalg.pinv(x.T @ x)
meat = (x * resid[:, None]).T @ (x * resid[:, None])
cov_hc1 = (n / (n - x.shape[1])) * (xtx_inv @ meat @ xtx_inv)
se_hc1 = np.sqrt(np.diag(cov_hc1))
t_hc1 = beta[1] / se_hc1[1]
p_hc1 = 2.0 * stats.t.sf(abs(t_hc1), df=dof)
slope_ci_hc1 = (beta[1] - t_crit * se_hc1[1], beta[1] + t_crit * se_hc1[1])
print("Contamination-Revenue Slope")
print(
"cohort: bundle_20260317_122818, sweep=i88nw811, mode=baseline, n_products=100, eta_ux=0.0"
)
print(f"n={n}")
print(f"model: revenue = {fit.intercept:.2f} {fit.slope:+.2f} * alpha")
print(
f"OLS: t({dof})={t_stat:.2f}, p={p_val:.3e}, R^2={r2:.3f}, slope_95CI=[{slope_ci[0]:.2f}, {slope_ci[1]:.2f}]"
)
print(
f"HC1: t={t_hc1:.2f}, p={p_hc1:.3e}, slope_95CI=[{slope_ci_hc1[0]:.2f}, {slope_ci_hc1[1]:.2f}]"
)
print(f"effect: +0.1 alpha -> {0.1 * fit.slope:.2f} revenue units")

Binary file not shown.

Binary file not shown.