📚 The CoCalc Library - books, templates and other resources
cocalc-examples / martinthoma-latex-examples / source-code / Pseudocode / Value-Iteration / Value-Iteration.tex
132930 viewsLicense: OTHER
\documentclass{article}1\usepackage[pdftex,active,tightpage]{preview}2\setlength\PreviewBorder{2mm}34\usepackage[utf8]{inputenc} % this is needed for umlauts5\usepackage[ngerman]{babel} % this is needed for umlauts6\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf7\usepackage{amssymb,amsmath,amsfonts} % nice math rendering8\usepackage{braket} % needed for \Set9\usepackage{caption}10\usepackage{algorithm}11\usepackage[noend]{algpseudocode}1213\DeclareCaptionFormat{myformat}{#3}14\captionsetup[algorithm]{format=myformat}1516\begin{document}17\begin{preview}18\begin{algorithm}[H]19\begin{algorithmic}20\Require21\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$22\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$23\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$24\Statex Transition probabilities $f_{xy}(a) = \mathbb{P}(y | x, a)$25\Statex Discounting factor $\alpha \in (0, 1)$, typically $\alpha = 0.9$26\Procedure{ValueIteration}{$\mathcal{X}$, $A$, $g$, $f$, $\alpha$}27\State Initialize $J, J': \mathcal{X} \rightarrow \mathbb{R}_0^+$ arbitrarily28\While{$J$ is not converged}29\State $J' \gets J$30\For{$x \in \mathcal{X}$}31\For{$a \in A(x)$}32\State $Q(x, a) \gets g(x, a) + \alpha \sum_{j=1}^{n_x} f_{xj}(a) \cdot J'(j)$33\EndFor34\EndFor35\For{$x \in \mathcal{X}$}36\State $J(x) \gets \min_a \{Q(x, a)\}$37\EndFor38\EndWhile39\Return $J$40\EndProcedure41\end{algorithmic}42\caption{Value Iteration: Learn function $J: \mathcal{X} \rightarrow \mathbb{R}$}43\label{alg:value-iteration}44\end{algorithm}45\end{preview}46\end{document}474849