mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 16:43:36 +00:00
algorithms and acknowledgements
This commit is contained in:
@@ -54,3 +54,15 @@ Deep dive into how the algorithm works, different kinds and justification for ch
|
|||||||
\subsection{Reinforcement Learning Formulation}
|
\subsection{Reinforcement Learning Formulation}
|
||||||
How do we define the state space, action space and reward function breakdown and algorithm benchmarking.
|
How do we define the state space, action space and reward function breakdown and algorithm benchmarking.
|
||||||
POSSIBLY: Expand into full subsections: 3.6.1 (State-Action Space), 3.6.2 (Reward Design), 3.6.3 (Benchmarking)
|
POSSIBLY: Expand into full subsections: 3.6.1 (State-Action Space), 3.6.2 (Reward Design), 3.6.3 (Benchmarking)
|
||||||
|
|
||||||
|
|
||||||
|
\begin{algorithm}[t]
|
||||||
|
\DontPrintSemicolon
|
||||||
|
\KwIn{stepsize $\eta$, smoothing $\delta$, rank $d$}
|
||||||
|
\For{$t=1$ \KwTo $T$}{
|
||||||
|
Sample $u_t$ on unit sphere; set $x_t^\prime=x_t+\delta u_t$\;
|
||||||
|
Set $p_t \gets U x_t^\prime$ and observe $q_t, R_t(p_t)$\;
|
||||||
|
$x_{t+1} \gets \Pi\_{\mathcal{X}}(x_t-\eta R_t(p_t) u_t)$\;
|
||||||
|
}
|
||||||
|
\caption{Online Pricing Optimization (template)}
|
||||||
|
\end{algorithm}
|
||||||
|
|||||||
3
paper/src/chapters/acknowledgements.tex
Normal file
3
paper/src/chapters/acknowledgements.tex
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
\section{Acknowledgements}
|
||||||
|
|
||||||
|
Eugene Bykovets, PhD - ETH
|
||||||
@@ -7,6 +7,7 @@
|
|||||||
\usepackage{tikz}
|
\usepackage{tikz}
|
||||||
\usepackage{listings}
|
\usepackage{listings}
|
||||||
\usepackage{xcolor}
|
\usepackage{xcolor}
|
||||||
|
\usepackage[ruled,vlined]{algorithm2e}
|
||||||
|
|
||||||
\usetikzlibrary{positioning, shapes, arrows.meta, fit, backgrounds}
|
\usetikzlibrary{positioning, shapes, arrows.meta, fit, backgrounds}
|
||||||
\lstset{
|
\lstset{
|
||||||
|
|||||||
Reference in New Issue
Block a user