📚 The CoCalc Library - books, templates and other resources
cocalc-examples / martinthoma-latex-examples / source-code / Pseudocode / Policy-Iteration / Policy-Iteration.tex
132939 viewsLicense: OTHER
\documentclass{article}1\usepackage[pdftex,active,tightpage]{preview}2\setlength\PreviewBorder{2mm}34\usepackage[utf8]{inputenc} % this is needed for umlauts5\usepackage[ngerman]{babel} % this is needed for umlauts6\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf7\usepackage{amssymb,amsmath,amsfonts} % nice math rendering8\usepackage{braket} % needed for \Set9\usepackage{caption}10\usepackage{algorithm}11\usepackage[noend]{algpseudocode}1213\DeclareCaptionFormat{myformat}{#3}14\captionsetup[algorithm]{format=myformat}1516\begin{document}17\begin{preview}18\begin{algorithm}[H]19\begin{algorithmic}20\Require21\Statex Sates $\mathcal{X} = \{1, \dots, n_x\}$22\Statex Actions $\mathcal{A} = \{1, \dots, n_a\},\qquad A: \mathcal{X} \Rightarrow \mathcal{A}$23\Statex Cost function $g: \mathcal{X} \times \mathcal{A} \rightarrow \mathbb{R}$24\Statex Transition probabilities $f$, $F$25\Statex $\alpha \in (0, 1)$26\Procedure{PolicyIteration}{$\mathcal{X}$, $A$, $g$, $f$, $F$, $\alpha$}27\State Initialize $\pi$ arbitrarily28\While{$\pi$ is not converged}29\State $J \gets$ solve system of linear equations $(I - \alpha \cdot F(\pi)) \cdot J = g(\pi)$3031\For{$x \in \mathcal{X}$}32\For{$a \in A(x)$}33\State $Q(x, a) \gets g(x, a) + \alpha \sum_{j=1}^{n_x} f_{xj}(a) \cdot J(j)$34\EndFor35\EndFor36\For{$x \in \mathcal{X}$}37\State $\pi(x) \gets \arg \min_a \{Q(x, a)\}$38\EndFor39\EndWhile40\Return $\pi$41\EndProcedure42\end{algorithmic}43\caption{Policy Iteration: Learning a policy $\pi: \mathcal{X} \rightarrow \mathcal{A}$}44\label{alg:policy-iteration}45\end{algorithm}46\end{preview}47\end{document}484950