📚 The CoCalc Library - books, templates and other resources
License: OTHER
\documentclass{beamer}1\usetheme{metropolis}2\usepackage{hyperref}3\usepackage[utf8]{inputenc} % this is needed for german umlauts4\usepackage[english]{babel} % this is needed for german umlauts5\usepackage[T1]{fontenc} % this is needed for correct output of umlauts in pdf6\usepackage{caption}7\usepackage{tikz}8\usetikzlibrary{arrows.meta}9\usetikzlibrary{decorations.pathreplacing}10\usetikzlibrary{positioning}11\usetikzlibrary{decorations.text}12\usetikzlibrary{decorations.pathmorphing}13\usetikzlibrary{shapes.multipart, calc}14\usepackage{minted} % needed for the inclusion of source code1516\begin{document}1718\title{Convolutional Neural Networks (CNNs)}19\subtitle{Theory and Applications}20\author{Martin Thoma -- \footnotesize \href{http://tinyurl.com/CNN-Intro}{tinyurl.com/CNN-Intro}}21\date{22. February 2019}22\subject{Machine Learning, AI, Neural Networks, Convolutional Neural Networks}2324\frame{\titlepage}2526% \section{Neural Network Basics}27% \subsection{}28\begin{frame}{Artificial Neuron (Perceptron)}29$$f: \mathbb{R}^n \rightarrow \mathbb{R}$$30\begin{figure}[ht]31\centering32\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/artificial-neuron.pdf}33\end{figure}34% $$f(x) = ax^2 + bx + c \text{ with } f(0) = 3, f(1) = 2, f(-1) = 6$$35% \begin{align*}36% \onslide<2->{f(0) &= a \cdot 0^2 + b \cdot 0 + c = 3} &\onslide<3->{\Rightarrow c &= 3\\}37% \onslide<4->{f(1) &= a \cdot 1^2 + b \cdot 1 + 3 = 2} &\onslide<5->{\Rightarrow a &= -1-b\\}38% \onslide<6->{f(-1) &= a \cdot {(-1)}^2 - b + 3 = 6\\}39% \onslide<7->{\Leftrightarrow 3&=a - b\\}40% \onslide<8->{\Leftrightarrow 3&= (-1-b) - b\\}41% \onslide<9->{\Leftrightarrow b&= -2\\}42% \onslide<10>{\Rightarrow \quad f(x) &= x^2 -2 x + 3\\}43% \end{align*}44% \only<1>{$$f: \mathbb{R}^n \rightarrow \mathbb{R}^m$$}45% \only<2>{$$f: \mathbb{R}^2 \rightarrow \mathbb{R}$$46% # 2x - 147% # (x-1)^2 + 148% Examples:49% \begin{itemize}50% \item $1 \rightarrow 1$: $f(x) = x$51% \item $2 \rightarrow 3$: $f(x) = $52% % \item $3 \rightarrow 3$53% \end{itemize}54% }55\end{frame}5657\begin{frame}{Multi-Layer Perceptron (MLP)}58$$f: \mathbb{R}^n \rightarrow \mathbb{R}^m$$59\begin{figure}[ht]60\centering61\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/perceptron-notation.pdf}62\end{figure}63\end{frame}6465\begin{frame}{}66\begin{itemize}[<+->]67\item Predict housing prices: (bed rooms, size, age) $\rightarrow$ Price68\item Product categorization: (weight, volume, price) $\rightarrow$ \{shoe, handbag, shirt\}69\item Image classification: List of pixel colors $\rightarrow$ \{cat, dog\}70\end{itemize}71\end{frame}7273\begin{frame}{}74\begin{center}75\Huge Data76\end{center}77\end{frame}7879\begin{frame}{Necessary Data}80\begin{itemize}81\item $f(x) = w_0$82\item $f(x) = w_1 \cdot x + w_0$83\item $f(x) = w_2^2 \cdot x^2 + w_1^2 \cdot x + w_0$84\item sin, cos, tan, \dots85\end{itemize}86\end{frame}8788\begin{frame}{Convolution}89\begin{figure}[ht]90\centering91\includegraphics[width=0.8\paperwidth]{graphics/convolution-linear.pdf}\\92\href{https://martin-thoma.com/graphic-filters/}{martin-thoma.com/graphic-filters}93\end{figure}94\end{frame}9596\begin{frame}{Max Pooling}97\begin{figure}[ht]98\centering99\includegraphics[width=0.8\paperwidth]{graphics/max-pooling.pdf}100\end{figure}101\end{frame}102103\begin{frame}{Convolutional Layer}104\begin{figure}[ht]105\centering106\input{graphics/convolution-layer}107\end{figure}108\end{frame}109110111\section{Applications}112\begin{frame}{Symbol recognizer}113\begin{figure}[ht]114\centering115\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/symbol-recognizer.png}116\captionsetup{labelformat=empty}117\caption{\href{http://write-math.com}{write-math.com}}118\end{figure}119\end{frame}120121\begin{frame}{}122\inputminted[linenos,123numbersep=7pt,124gobble=0,125fontsize=\footnotesize, tabsize=4]{python}{cnn.py}126\end{frame}127128\begin{frame}{Super Resolution}129\begin{figure}[ht]130\centering131\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/pixel-recursive-super-resolution.png}132\captionsetup{labelformat=empty}133\caption{Dahl, Norouzi, Shlens: Pixel recursive super resolution (2017)}134\end{figure}135\end{frame}136137138\begin{frame}{Colorization: The Problem}139\begin{figure}[ht]140\centering141\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/multimodality-apple.png}142\captionsetup{labelformat=empty}143\caption{Cinarel: Automatic Colorization of Webtoons Using Deep Convolutional Neural Networks (2018)}144\end{figure}145\end{frame}146147148\begin{frame}{Colorization - Photographs}149\begin{figure}[ht]150\centering151\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/colorful-image-colorization.png}152\captionsetup{labelformat=empty}153\caption{Zhang, Isola, Efros: Colorful Image Colorization (2016)}154\end{figure}155156Interactive Demo: \href{http://richzhang.github.io/colorization/}{richzhang.github.io/colorization}\\157Model Lab: \href{https://github.com/MartinThoma/model-lab}{github.com/MartinThoma/model-lab}158\end{frame}159160161\begin{frame}{Colorization - Comic}162\begin{figure}[ht]163\centering164\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/comic-colorization.png}165\captionsetup{labelformat=empty}166\caption{Ci, Ma, Wang, Li, Luo: User-Guided Deep Anime Line Art Colorization with Conditional Adversarial Networks (2018)}167\end{figure}168\end{frame}169170\begin{frame}{Denoising}171\begin{figure}[ht]172\centering173\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/denoising.png}174\captionsetup{labelformat=empty}175\caption{Zhang, Zuo, Gu, Zhang: Learning Deep CNN Denoiser Prior for Image Restoration (2017)}176\end{figure}177\end{frame}178179180\begin{frame}{Image Inpainting (Watermark removal)}181\begin{figure}[ht]182\centering183\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/leopard-inpainting.png}184\captionsetup{labelformat=empty}185\caption{Yang, Lu, Lin, Shechtman, Wang, Li: High-Resolution Image Inpainting using Multi-Scale Neural Patch Synthesis (2017)}186\end{figure}187\end{frame}188189190\begin{frame}{CNNs in NLP}191\begin{figure}[ht]192\centering193\includegraphics[width=0.8\paperwidth, height=0.7\paperheight, keepaspectratio]{graphics/tdnns.png}194\captionsetup{labelformat=empty}195\caption{Collobert, Weston, Bottou, Karlen, Kavukcuoglu, Kuksa:196Natural Language Processing (almost) from Scratch (2011)}197\end{figure}198\end{frame}199200\end{document}201202203