Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132930 views
License: OTHER
1
\subsection{How can we use this massive amout of information?}
2
\begin{frame}{How can we use this massive amout of information?}
3
\begin{itemize}[<+->]
4
\item 625.3 million websites
5
\item Wikipedia is one website and has several millions of pages
6
\item[$\Rightarrow$] we need to rank websites!
7
\end{itemize}
8
\end{frame}
9
10
\subsection{Idea}
11
\begin{frame}{Basics of PageRank}
12
We all know that:
13
\begin{itemize}[<+->]
14
\item humans know what is good for them
15
\item[\xmark] machines don't know what's good for humans
16
\item humans create websites
17
\item humans will only \href{http://en.wikipedia.org/wiki/Hyperlink}{link} to websites they like
18
\item[$\Rightarrow$] hyperlinks are a quality indicator
19
\end{itemize}
20
\end{frame}
21
22
\begin{frame}{How Could We Use That?}
23
\begin{itemize}[<+->]
24
\item simply count number of links to a website
25
\item[\xmark] 10,000 links from only one page
26
\item count number of websites that link to a website
27
\item[\xmark] quality of the linking website matters
28
\item[\xmark] total number of links on the source page matters
29
\end{itemize}
30
\end{frame}
31
32
\framedgraphic{A Brilliant Idea}{../images/BrinPage.jpg}
33
34
\begin{frame}{Ideas of PageRank}
35
\begin{itemize}[<+->]
36
\item decisions of humans are complicated
37
\item a lot of webpages get visited
38
\item[$\Rightarrow$] modellize clicks on links as random behaviour
39
\item links are important
40
\begin{itemize}
41
\item links of page A get less important, if A has many links
42
\item links of page A get more important, if many link to A
43
\end{itemize}
44
\item[$\Rightarrow$] if B has a link from A, the rank of B increases by $\frac{Rank(A)}{Links(A)}$
45
\end{itemize}
46
47
\pause[\thebeamerpauses]
48
49
\begin{algorithmic}
50
\If{A links to B}
51
\State $Rank(B)$ += $\frac{Rank(A)}{Links(A)}$
52
\EndIf
53
\end{algorithmic}
54
\end{frame}
55
56
\begin{frame}{What is PageRank?}
57
The PageRank algorithm calculates the probability of a randomly
58
clicking user ending up on a given page.
59
\end{frame}
60
61
\input{Animation}
62
63
%\begin{frame}{Ants}
64
% \begin{itemize}[<+->]
65
% \item Websites = nodes = anthill
66
% \item Links = edges = paths
67
% \item You place ants on each node
68
% \item They walk over the paths
69
% \item[] (at random, they are ants!)
70
% \item After some time, some anthills will have more ants than
71
% others
72
% \item Those hills are more attractive than others
73
% \item \# ants is probability that a random user would end on
74
% a website
75
% \end{itemize}
76
%\end{frame}
77
78
\begin{frame}{Mathematics}
79
Let $x$ be a web page. Then
80
\begin{itemize}
81
\item $L(x)$ is the set of websites that link to $x$
82
\item $C(y)$ is the out-degree of page $y$
83
\item $\alpha$ is probability of random jump
84
\item $N$ is the total number of websites
85
\end{itemize}
86
87
\[\displaystyle PR(x) := \alpha \left ( \frac{1}{N} \right ) + (1-\alpha) \sum_{y\in L(x)} \frac{PR(y)}{C(y)}\]
88
\end{frame}
89
90
\begin{frame}{Pseudocode}
91
\begin{algorithmic}
92
\alertline<1> \Function{PageRank}{Graph $web$, double $q=0.15$, int $iterations$} %q is a damping factor
93
%\alertline<2> \ForAll{$page \in web$}
94
%\alertline<3> \State $page.pageRank = \frac{1}{|web|}$ \Comment{intial probability}
95
%\alertline<2> \EndFor
96
97
\alertline<2> \While{$iterations > 0$}
98
\alertline<3> \ForAll{$page \in web$} \Comment{calculate pageRank of $page$}
99
\alertline<4> \State $page.pageRank = q$
100
\alertline<5> \ForAll{$y \in L(page)$}
101
\alertline<6> \State $page.pageRank$ += $\frac{y.pageRank}{C(y)}$
102
\alertline<5> \EndFor
103
\alertline<3> \EndFor
104
\alertline<2> \State $iterations$ -= $1$
105
\alertline<2> \EndWhile
106
\alertline<1> \EndFunction
107
\end{algorithmic}
108
\end{frame}
109
110