CoCalc -- feather.tex

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / martinthoma-latex-examples / presentations / software-engineering-basics / feather.tex
²⁰²⁰²⁹ views
License: OTHER
1
\documentclass[10pt]{beamer}
2
\usetheme[
3
%%% option passed to the outer theme
4
%    progressstyle=fixedCircCnt,   % fixedCircCnt, movingCircCnt (moving is deault)
5
  ]{Feather}
6
  
7
% If you want to change the colors of the various elements in the theme, edit and uncomment the following lines
8

9
% Change the bar colors:
10
%\setbeamercolor{Feather}{fg=red!20,bg=red}
11

12
% Change the color of the structural elements:
13
%\setbeamercolor{structure}{fg=red}
14

15
% Change the frame title text color:
16
%\setbeamercolor{frametitle}{fg=blue}
17

18
% Change the normal text color background:
19
%\setbeamercolor{normal text}{fg=black,bg=gray!10}
20

21
%-------------------------------------------------------
22
% INCLUDE PACKAGES
23
%-------------------------------------------------------
24

25
\usepackage[utf8]{inputenc}
26
\usepackage[english]{babel}
27
\usepackage[T1]{fontenc}
28
\usepackage{helvet}
29
\usepackage{multirow}
30

31
%-------------------------------------------------------
32
% DEFFINING AND REDEFINING COMMANDS
33
%-------------------------------------------------------
34

35
% colored hyperlinks
36
\newcommand{\chref}[2]{
37
  \href{#1}{{\usebeamercolor[bg]{Feather}#2}}
38
}
39

40
%-------------------------------------------------------
41
% INFORMATION IN THE TITLE PAGE
42
%-------------------------------------------------------
43

44
\title[] % [] is optional - is placed on the bottom of the sidebar on every slide
45
{ % is placed on the title page
46
      \textbf{Complete The Look Recommendation with Street Fashion Images}
47
}
48

49
\subtitle[Complete The Look Recommendation]
50
{
51
%      \textbf{v. 1.0.0}
52
}
53

54
\author[Bhaskar Bagchi]
55
{      Bhaskar Bagchi \\
56
      {\ttfamily bagchi.bhaskar@cse.iitkgp.ernet.in}
57
}
58

59
\institute[]
60
{
61
      Department of Computer Science and Engineering\\
62
      Indian Institute of Technology, Kharagpur\\
63
  
64
  %there must be an empty line above this line - otherwise some unwanted space is added between the university and the country (I do not know why;( )
65
}
66

67
\date{\today}
68

69
%-------------------------------------------------------
70
% THE BODY OF THE PRESENTATION
71
%-------------------------------------------------------
72

73
\begin{document}
74

75
%-------------------------------------------------------
76
% THE TITLEPAGE
77
%-------------------------------------------------------
78

79
{\1% % this is the name of the PDF file for the background
80
\begin{frame}[plain,noframenumbering] % the plain option removes the header from the title page, noframenumbering removes the numbering of this frame only
81
  \titlepage % call the title page information from above
82
\end{frame}}
83

84

85
\begin{frame}{Content}{}
86
\tableofcontents
87
\end{frame}
88

89
%-------------------------------------------------------
90
\section{Introduction}
91
%-------------------------------------------------------
92
\subsection{Problem Definition}
93
\begin{frame}{Introduction}{Problem Definition}
94
%-------------------------------------------------------
95

96
  \begin{itemize}
97
    \item<1-> Given an item(clothing) in the shopping cart the problem statement is to suggest items complementary to it which may contain garments or accessories which makes a complete set as per current fashion.
98
%    \item<2-> The rest of the theme is provided under the GNU General Public License v. 3 (GPLv3) \chref{http://www.gnu.org/licenses/}{http://www.gnu.org/licenses/}. This means that you can redistribute it and/or modify it under the same license. 
99
  \end{itemize}
100
\end{frame}
101

102

103
\begin{frame}{Introduction}{Problem Definition}
104
\begin{figure}[t]
105
    \centering
106
    \includegraphics[height=\dimexpr11\textheight/16\relax]{reco_exst_1}
107
    \caption{Existing Recommendation Systems}
108
  \end{figure}
109
\end{frame}
110

111

112

113
\begin{frame}{Introduction}{Problem Definition}
114
\begin{figure}[t]
115
    \centering
116
    \includegraphics[height=\dimexpr11\textheight/16\relax]{Untitled}
117
    \caption{Visualization of the problem statement}
118
  \end{figure}
119
\end{frame}
120

121

122
%-------------------------------------------------------
123
\section{Mathematical Formulation}
124
%-------------------------------------------------------
125
\subsection{Feature Representation}
126
% \begin{frame}{Mathematical Formulation}{Feature Representation}
127
% %-------------------------------------------------------
128

129
% \begin{itemize}
130
%   \pause
131
%   \item<1-> Say we have n training images.
132
%   \pause
133
%   \item<2-> Each image `\emph{i}' has `\emph{p}' parts.
134
%   \pause
135
%   \item<3-> Each part is represented by a \emph{k-dimensional} feature vector, \emph{h}.
136
% \end{itemize}
137

138
% \pause
139

140
% \begin{block}{}
141
% Thus each can be represented as follows:
142
% \begin{itemize}
143
% \item {\tt Image H$_{i}${$^{T}$} = [h$_{i1}${$^{T}$}, h$_{i2}${$^{T}$},... , h$_{ip}${$^{T}$}].}
144
% \item {\tt Dataset H = [H$_{1}$, H$_{2}$,... , H$_{n}$]}
145
% \end{itemize}
146
% \end{block}
147

148
% \pause
149
% \begin{block}{}
150
% The recommendation task will be to generate a predictive model \emph{M}(\emph{H},\emph{q}), where \emph{H} is the training set and given some query \emph{q} with some part m missing, we can predict the m$^{th}$ part.
151
% \end{block}
152

153
% \end{frame}
154

155

156
% %-------------------------------------------------------
157
% \subsection{Simplified Formulation}
158
% \begin{frame}{Mathematical Formulation}{Simplified Formulation}
159
% %-------------------------------------------------------
160
%   The previous formulation is very rigid, where each image has to have \emph{p} parts. So we have following relaxation.
161
%   \pause
162
%   \begin{block}{Relaxations}
163
%   \begin{itemize}    
164
%     \item Instead of asserting each image to have \emph{p} parts we keep it flexible.
165
%     \item We also don't define any particular order of parts of clothings.
166
%     \item Instead of using numeric features we use textual tags corresponding to each part of the garment/accessory as feature.
167
%     \item Thus each image is defined by a number of features, which are 2--tuples $\{ Cloth-type \: {:} \: Cloth-description/color \}$.
168
%   \end{itemize}
169
%   \end{block}
170
% \end{frame}
171

172
\begin{frame}{Mathematical Formulation}{Simplified Formulation}
173
\begin{block}{}
174
Given an image $i$ containing `$k$' part--features, we describe the image $P_i$ as $P_i^{T} := [p_{i1}, p_{i2}, ..., p_{ik}]$ where each $p_{ij}$ are textual part--features, which are 2--tuples.
175
\end{block}
176
\pause
177
\begin{block}{}
178
We learn a model from our dataset of fashion images, say \textbf{P}, where \textbf{P} := $[P_1, P_2, ... P_n]^{T}$.
179
\end{block}
180
\pause
181
\begin{block}{}
182
The task of our recommendation system is, given one or more apparel, and corresponding part features $p$'s as input query, recommend garments which can be worn with it/them as a set.
183
\end{block}
184
\end{frame}     
185

186
%-------------------------------------------------------
187
% \section{Approach}
188
% \subsection{Basic Approach}
189
% \begin{frame}{Approach}{Basic Approach}
190
% %-------------------------------------------------------
191

192
% \begin{enumerate}
193
% 	\pause
194
% 	\item Make a bipartite graph where one of the partite sets is \emph{images} and the other is  \emph{features}. Edge exists if feature is present in the image.
195
%     \pause
196
%     \item Take the projection of this graph on the feature set, thus we get the co--occurrence graph.
197
%     \pause
198
%     \item For given query part--feature, calculate its SimRank with its neighbours and return the k--nearest--neighbours.
199
%     \pause
200
%     \item In case of more than one feature--parts in the query, for each part find the k--nearest--neighbours.
201
%     \pause
202
%     \item For each k--nearest--neighbour list combine them into a single list using \emph{Broda's Rank Aggregation}.
203
% \end{enumerate}
204
% \end{frame}
205

206
\begin{frame}{Approach}{Flow Diagram}
207
\begin{figure}[t]
208
    \centering
209
    \includegraphics[height=\dimexpr11\textheight/16\relax]{flowchart}
210
    \caption{Flow Diagram of Proposed Approach}
211
  \end{figure}
212
\end{frame}
213

214
\subsection{Fashion Websites \& Ground Truth}
215
\begin{frame}{Fashion Websites \& Ground Truth}{Scraping Fashion Websites}
216
\begin{itemize}
217
\item Scraped more than 500 images of female fashionstas from \url{www.chictopia.com}. These images covered an appreciable range of street fashion from corporate dressing sense to the most casual of the dresses.
218
\pause
219
\item Created a vocabulary of part features. Manually normalize the tags associated with each image.
220
\pause
221
\item Ended up with a codebook of total of \textit{48} unique categories including garments like tops, jeans, etc. and accessories like watches, bracelets, etc. and \textit{632} unique items i.e. category-description pair.
222
\end{itemize}
223
	%\begin{figure}[t]
224
    %\centering
225
    %\includegraphics[scale=0.3]{simrank}
226
    %\caption{Directed graph representing websites and hyperlinks}
227
  	%\end{figure}
228
\end{frame}
229

230

231
\subsection{Bipartite Network and Co--occurrence Graph}
232
\begin{frame}{Bipartite Network and Co--occurrence Graph}{}
233
\begin{itemize}
234
\item A bipartite graph is formed with dataset images $P$'s as the first partite sets and part--features $p_i$'s as the second partite set. There exists an edge between ever part feature and the image in which it occurred.
235
\pause
236
\item The bipartite graph is then projected to the set of part features.
237
\pause
238
\item The projected graph so obtained is a weighted co--occurrence graph of the part features. Construction of this graph gives us the relation between different garments and accessories which can be used together and are complementary to each other.
239
\pause
240
\item This step helps us learn a correlation and inter-dependence between various part features from the dataset.
241
\end{itemize}
242

243

244
%Mathematically SimRank score is defined recursively. Let us denote similarity between objects a and b by S(a, b) $\in$ [0, 1].
245
%\begin{block}{Formula}
246
%\begin{equation}
247
%S(a, b) =	\left\{ \begin{array}{ll}
248
%			1 & \mbox{if $a=b$}; \\
249
%			\frac{C}{\left | I(a) \right | \left | I(b) \right |}\sum_{i = 0}^{\left | I(a) \right |}\sum_{j = 0}^{\left | I(b) \right |}S\left(I_i\left(a\right), I_j\left(b\right ) \right) & \mbox{$otherwise$}.\end{array} \right.
250
%\end{equation}
251

252
%where, \emph{C} is a constant between 0 and 1 which controls the amount of score propagation in recursive call, \emph{I(a)} is the set of incoming edges to a and \emph{I(b)} is the set of incoming edges to b.
253

254
%\end{block}
255
\end{frame}
256

257
\subsection{Similarity Measure \& Nearest Neighbor Consensus}
258
\begin{frame}{Similarity Measure \& Nearest Neighbor}{Similarity Measure}
259
\begin{itemize}
260
\item The co-occurrence graph falls in a domain where nodes represents the objects and edges represents the relations between them. We use \textit{Simrank} to measure the similarity based on \textit{structural context} of the graph.
261
\pause
262
\item Convert the co--occurrence graph into a directed graph where each edge between part features $p_a$ and $p_b$ in the original graph is replaced by two directed edges $p_a \rightarrow p_b$ and $p_b \rightarrow p_a$ both with weights equal to the weight of original edge.
263
\pause
264
\item Compute \textit{Simrank} between each pair of nodes.
265
\end{itemize}
266

267
%\pause
268
%\begin{block}{Types of rank aggregation problems}
269
%\begin{enumerate}
270
%\item If the individual lists contain all the elements in \emph{U} then they are called complete lists. They are a total ordering of \emph{U}.
271
%\item There are situations where full lists are not convenient, and even not possible. In such cases the lists contain an ordered list of a subset of elements of \emph{U}. $ \left | L_i \right | < \left | U \right |$. Such lists are called partial lists.
272
%\item A special case of the partial list problem is the top--K list. In this ranking we take the top K elements from each ordered list, and so we know that all the elements that are not present in the list are ranked below those which are present in the list.
273
%\end{enumerate}
274
%\end{block}
275
\end{frame}
276

277
\begin{frame}{Similarity Measure \& Nearest Neighbor}{Nearest Neighbor Consensus}
278
\begin{itemize}
279
\item Given a part--feature $p$ as query we locate the node corresponding to that part feature in the co--occurrence graph.
280
\pause
281
\item We find out other nodes which are close to it, i.e. nodes which have highest simrank value with this node.
282
\pause
283
\item The rationale behind this step is that since the graph had edges between part features that were used together by fashionistas and as the simrank values decrease with increase in node distances, the $k$--nearest--neighbors will be those part features which were frequently used with the selected item and are contemporary to it.
284
\pause
285
\item We get a list of k part features $p_1, p_2, ... p_k$ which are structurally close to the input feature and thus they can be recommended for the given query part feature.
286
\end{itemize}
287
\end{frame}
288

289
\subsection{Aggregating Ranked Item Recommendations}
290
\begin{frame}{Aggregating Ranked Item Recommendations}{Rank Aggregation}
291
\begin{itemize}
292
\item Say we have $j$ part features $p_1, p_2, ... p_j$ as input query, we find out individual $k$--nearest--neighbors for each part feature.
293
\pause
294
\item we have $j$ ranked lists, each with $k$ members, which are recommendation related to each input feature.
295
\pause
296
\item Assigns a score corresponding to position in which a part feature appears within each ranked list. In our case, for each list $i$, ${p_a}^i$ is assigned a weight ${B_{p_a}}^i$ = $k$ * fraction of part features in the list appearing below $p_a$.
297
\pause
298
\item The \textit{Broda} score of each element $B_{p_a}$ is the the sum of \textit{Broda} scores for that part feature in all the lists.
299
\pause
300
\item We can recomment the top $k$ elements from this ranked list to the user.
301
\end{itemize}
302
\end{frame}
303

304

305
%-------------------------------------------------------
306
\section{Experimental Results}
307
\begin{frame}{Experimental Results}{Evaluation Methodology}
308
%-------------------------------------------------------
309
\begin{itemize}
310
\item We took 20 images as test set from our dataset. Since each image is user tagged, we have labelled ground truth for computing the required metrics.
311
\pause
312
\item For each image we took, we used all its part features individually as one feature input. We also used various permutations of 2 part features and 3 part features as input to the recommender and compared the recommended part features with the ground truth.
313
\pause
314
\item Then we calculated \textit{precision, recall} and \textit{f1} values for 158 sets of recommendations.
315
\end{itemize}
316
\pause
317
\begin{block}{Formula}
318
$precision = \frac{no \ of \ matched \ recommendations}{no \ of \ recommendations}$
319
\newline
320
$recall = \frac{no \ of \ matched \ recommendation}{no \ of \ items \ in \ actual \ image}$
321
\end{block}
322
\end{frame}
323

324
\begin{frame}{Experimental Results}{Results}
325
Out of the 158 recommendation sets that we tested, 53 were 1 part feature input, 54 were 2 part feature input and 51 as 3 part feature input. For each generated recommendations we calculated the precision and recall.
326

327
\begin{table}
328
\centering
329
\caption{Precision}
330
\begin{tabular}{|c|c|c|c|}
331
\hline
332
No. of inputs & Max Precision & Avg Precision\\
333
\hline\hline
334
1 & 1 & 0.31\\
335
2 & 0.75 & 0.31\\
336
3 & 0.6 & 0.28\\
337
\hline\end{tabular}
338
\label{table:precision}
339
\end{table}
340

341
\begin{table}
342
\centering
343
\caption{Recall}
344
\begin{tabular}{|c|c|c|c|}
345
\hline
346
No. of inputs & Max Recall & Avg Recall\\
347
\hline\hline
348
1 & 0.8 & 0.23\\
349
2 & 1 & 0.44\\
350
3 & 1 & 0.48\\
351
\hline\end{tabular}
352
\label{table:recall}
353
\end{table}
354
\end{frame}
355

356
\begin{frame}{Experimental Results}{Results}
357
\begin{table}
358
\centering
359
\caption{f1 score}
360
\begin{tabular}{|c|c|c|}
361
\hline
362
No. of inputs & Max f1 & Min f1\\
363
\hline\hline
364
1 & 0.89 & 0.13\\
365
2 & 0.71 & 0.1\\
366
3 & 0.67 & 0.1\\
367
\hline\end{tabular}
368
\label{table:f1}
369
\end{table}
370
\begin{figure}[htb]
371
\centering
372
\includegraphics[scale=0.4]{g11}
373
\caption{Precision-Recall for 1 item input}
374
\label{fig:g1}
375
\end{figure}
376
\end{frame}
377

378
\begin{frame}{Experimental Results}{Precision Recall Graphs}
379
\begin{figure}[htb]
380
\centering
381
\includegraphics[scale=0.4]{g22}
382
%\caption{Precision-Recall for 2 item input}
383
\label{fig:g2}
384
\end{figure}
385
\begin{figure}[htb]
386
\centering
387
\includegraphics[scale=0.4]{g33}
388
%\caption{Precision-Recall for 3 item input}
389
\label{fig:g3}
390
\end{figure}
391
\end{frame}
392

393
\begin{frame}{Experimental Results}{Manual Evaluation Results}
394
\begin{table}
395
\centering
396
\caption{User rating for recommendation}
397
\begin{tabular}{|c|c|c|}
398
\hline
399
Rate(out of 10) & Frequency & Cumulative Freq.\\
400
\hline\hline
401
10 & 1 & 1\\
402
9 & 2 & 3\\
403
8 & 9 & 12\\
404
7 & 9 & 21\\
405
6 & 5 & 26\\
406
5 & 11 & 37\\
407
4 & 11 & 48\\
408
3 & 6 & 54\\
409
2 & 4 & 58\\
410
1 & 2 & 60\\
411
\hline\end{tabular}
412
\label{table:userRating}
413
\end{table}
414

415
\end{frame}
416

417
%-------------------------------------------------------
418
\section{Future Work}
419
\begin{frame}{Future Work}
420
%-------------------------------------------------------
421

422
\begin{itemize}
423

424
\item Features for representation of parts are to be improved by incorporating visual features. Inclusion of visual features will also include the analysis of features like color, texture, etc. which is expected to improve the quality of evaluation.
425

426
\item A feedback system can be added to the system as to increase edge weights to the features which are shopped together by users. This will be a self learning system and incorporate the changes in trending fashion all by itself.
427

428
\end{itemize}
429

430
\end{frame}
431

432
%-------------------------------------------------------
433
\section{References}
434
\begin{frame}{References}
435
%-------------------------------------------------------
436

437
\bibliographystyle{abbrv}
438
%\bibliography{sigproc}
439

440
\begin{thebibliography}{1}
441

442
\bibitem{rankAggregation}
443
C.~Dwork, R.~Kumar, M.~Naor, and D.~Sivakumar.
444
\newblock Rank aggregation methods for the web.
445
\newblock In {\em Proceedings of the 10th International Conference on World
446
  Wide Web}, WWW '01, pages 613--622, New York, NY, USA, 2001. ACM.
447

448
\bibitem{simrank}
449
G.~Jeh and J.~Widom.
450
\newblock Simrank: A measure of structural-context similarity.
451
\newblock In {\em Proceedings of the Eighth ACM SIGKDD International Conference
452
  on Knowledge Discovery and Data Mining}, KDD '02, pages 538--543, New York,
453
  NY, USA, 2002. ACM.
454

455
\bibitem{bundleReco}
456
T.~Zhu, P.~Harrington, J.~Li, and L.~Tang.
457
\newblock Bundle recommendation in ecommerce.
458
\newblock In {\em Proceedings of the 37th International ACM SIGIR Conference on
459
  Research \&\#38; Development in Information Retrieval}, SIGIR '14, pages
460
  657--666, New York, NY, USA, 2014. ACM.
461

462
\end{thebibliography}
463
\end{frame}
464

465

466
{\1
467
\begin{frame}[plain,noframenumbering]
468
  \finalpage{Thank you! Questions?}
469
\end{frame}}
470

471
\end{document}
472
Product

Resources

Company