📚 The CoCalc Library - books, templates and other resources
License: OTHER
\subsection{How can we use this massive amout of information?}1\begin{frame}{How can we use this massive amout of information?}2\begin{itemize}[<+->]3\item 625.3 million websites4\item Wikipedia is one website and has several millions of pages5\item[$\Rightarrow$] we need to rank websites!6\end{itemize}7\end{frame}89\subsection{Idea}10\begin{frame}{Basics of PageRank}11We all know that:12\begin{itemize}[<+->]13\item humans know what is good for them14\item[\xmark] machines don't know what's good for humans15\item humans create websites16\item humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to websites they like17\item[$\Rightarrow$] hyperlinks are a quality indicator18\end{itemize}19\end{frame}2021\begin{frame}{How Could We Use That?}22\begin{itemize}[<+->]23\item simply count number of links to a website24\item[\xmark] 10,000 links from only one page25\item count number of websites that link to a website26\item[\xmark] quality of the linking website matters27\item[\xmark] total number of links on the source page matters28\end{itemize}29\end{frame}3031\framedgraphic{A Brilliant Idea}{../images/BrinPage.jpg}3233\begin{frame}{Ideas of PageRank}34\begin{itemize}[<+->]35\item decisions of humans are complicated36\item a lot of webpages get visited37\item[$\Rightarrow$] modellize clicks on links as random behaviour38\item links are important39\begin{itemize}40\item links of page A get less important, if A has many links41\item links of page A get more important, if many link to A42\end{itemize}43\item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$44\end{itemize}4546\pause[\thebeamerpauses]4748\begin{algorithmic}49\If{A links to B}50\State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$51\EndIf52\end{algorithmic}53\end{frame}5455\begin{frame}{What is PageRank?}56The PageRank algorithm calculates the probability of a randomly57clicking user ending up on a given page.58\end{frame}5960\input{Animation}6162%\begin{frame}{Ants}63% \begin{itemize}[<+->]64% \item Websites = nodes = anthill65% \item Links = edges = paths66% \item You place ants on each node67% \item They walk over the paths68% \item[] (at random, they are ants!)69% \item After some time, some anthills will have more ants than70% others71% \item Those hills are more attractive than others72% \item \# ants is probability that a random user would end on73% a website74% \end{itemize}75%\end{frame}7677\begin{frame}{Mathematics}78Let $x$ be a web page. Then79\begin{itemize}80\item $L(x)$ is the set of websites that link to $x$81\item $C(y)$ is the out-degree of page $y$82\item $\alpha$ is probability of random jump83\item $N$ is the total number of websites84\end{itemize}8586\[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C(y)}\]87\end{frame}8889\begin{frame}{Pseudocode}90\begin{algorithmic}91\alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor92%\alertline<2> \ForAll{$page \in web$}93%\alertline<3> \State $page.pageRank = \frac{1}{|web|}$ \Comment{intial probability}94%\alertline<2> \EndFor9596\alertline<2> \While{$iterations > 0$}97\alertline<3> \ForAll{$page \in web$} \Comment{calculate pageRank of $page$}98\alertline<4> \State $page.pageRank = q$99\alertline<5> \ForAll{$y \in L(page)$}100\alertline<6> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$101\alertline<5> \EndFor102\alertline<3> \EndFor103\alertline<2> \State $iterations$ -= $1$104\alertline<2> \EndWhile105\alertline<1> \EndFunction106\end{algorithmic}107\end{frame}108109110