From 26af688d4d03989e4f69e5fe7b49f3673b8d7072 Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Fri, 7 Nov 2025 14:33:46 +0100 Subject: [PATCH] algorithms and acknowledgements --- paper/src/chapters/03-methodology.tex | 12 ++++++++++++ paper/src/chapters/acknowledgements.tex | 3 +++ paper/src/preamble.tex | 1 + 3 files changed, 16 insertions(+) create mode 100644 paper/src/chapters/acknowledgements.tex diff --git a/paper/src/chapters/03-methodology.tex b/paper/src/chapters/03-methodology.tex index 2c13011..dd21186 100644 --- a/paper/src/chapters/03-methodology.tex +++ b/paper/src/chapters/03-methodology.tex @@ -54,3 +54,15 @@ Deep dive into how the algorithm works, different kinds and justification for ch \subsection{Reinforcement Learning Formulation} How do we define the state space, action space and reward function breakdown and algorithm benchmarking. POSSIBLY: Expand into full subsections: 3.6.1 (State-Action Space), 3.6.2 (Reward Design), 3.6.3 (Benchmarking) + + +\begin{algorithm}[t] +\DontPrintSemicolon +\KwIn{stepsize $\eta$, smoothing $\delta$, rank $d$} +\For{$t=1$ \KwTo $T$}{ + Sample $u_t$ on unit sphere; set $x_t^\prime=x_t+\delta u_t$\; + Set $p_t \gets U x_t^\prime$ and observe $q_t, R_t(p_t)$\; + $x_{t+1} \gets \Pi\_{\mathcal{X}}(x_t-\eta R_t(p_t) u_t)$\; +} +\caption{Online Pricing Optimization (template)} +\end{algorithm} diff --git a/paper/src/chapters/acknowledgements.tex b/paper/src/chapters/acknowledgements.tex new file mode 100644 index 0000000..160bad2 --- /dev/null +++ b/paper/src/chapters/acknowledgements.tex @@ -0,0 +1,3 @@ +\section{Acknowledgements} + +Eugene Bykovets, PhD - ETH diff --git a/paper/src/preamble.tex b/paper/src/preamble.tex index 91cd761..0acd7c7 100644 --- a/paper/src/preamble.tex +++ b/paper/src/preamble.tex @@ -7,6 +7,7 @@ \usepackage{tikz} \usepackage{listings} \usepackage{xcolor} +\usepackage[ruled,vlined]{algorithm2e} \usetikzlibrary{positioning, shapes, arrows.meta, fit, backgrounds} \lstset{