{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "hide_input": false }, "outputs": [], "source": [ "%matplotlib inline\n", "from preamble import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Representing Data and Engineering Features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Categorical Variables" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\\begin{align*}\n", "\\end{align*}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### One-Hot-Encoding (Dummy variables)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | age | \n", "workclass | \n", "education | \n", "gender | \n", "hours-per-week | \n", "occupation | \n", "income | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "39 | \n", "State-gov | \n", "Bachelors | \n", "Male | \n", "40 | \n", "Adm-clerical | \n", "<=50K | \n", "
1 | \n", "50 | \n", "Self-emp-not-inc | \n", "Bachelors | \n", "Male | \n", "13 | \n", "Exec-managerial | \n", "<=50K | \n", "
2 | \n", "38 | \n", "Private | \n", "HS-grad | \n", "Male | \n", "40 | \n", "Handlers-cleaners | \n", "<=50K | \n", "
3 | \n", "53 | \n", "Private | \n", "11th | \n", "Male | \n", "40 | \n", "Handlers-cleaners | \n", "<=50K | \n", "
4 | \n", "28 | \n", "Private | \n", "Bachelors | \n", "Female | \n", "40 | \n", "Prof-specialty | \n", "<=50K | \n", "
\n", " | age | \n", "hours-per-week | \n", "workclass_ ? | \n", "workclass_ Federal-gov | \n", "... | \n", "occupation_ Tech-support | \n", "occupation_ Transport-moving | \n", "income_ <=50K | \n", "income_ >50K | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "39 | \n", "40 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "50 | \n", "13 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
2 rows × 46 columns
\n", "\n", " | Integer Feature | \n", "Categorical Feature | \n", "
---|---|---|
0 | \n", "0 | \n", "socks | \n", "
1 | \n", "1 | \n", "fox | \n", "
2 | \n", "2 | \n", "socks | \n", "
3 | \n", "1 | \n", "box | \n", "
\n", " | Integer Feature | \n", "Categorical Feature_box | \n", "Categorical Feature_fox | \n", "Categorical Feature_socks | \n", "
---|---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
1 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "
2 | \n", "2 | \n", "0 | \n", "0 | \n", "1 | \n", "
3 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "
\n", " | Integer Feature_0 | \n", "Integer Feature_1 | \n", "Integer Feature_2 | \n", "Categorical Feature_box | \n", "Categorical Feature_fox | \n", "Categorical Feature_socks | \n", "
---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
2 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "
3 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "
\n", " | age | \n", "workclass | \n", "education | \n", "gender | \n", "hours-per-week | \n", "occupation | \n", "income | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "39 | \n", "State-gov | \n", "Bachelors | \n", "Male | \n", "40 | \n", "Adm-clerical | \n", "<=50K | \n", "
1 | \n", "50 | \n", "Self-emp-not-inc | \n", "Bachelors | \n", "Male | \n", "13 | \n", "Exec-managerial | \n", "<=50K | \n", "
2 | \n", "38 | \n", "Private | \n", "HS-grad | \n", "Male | \n", "40 | \n", "Handlers-cleaners | \n", "<=50K | \n", "
3 | \n", "53 | \n", "Private | \n", "11th | \n", "Male | \n", "40 | \n", "Handlers-cleaners | \n", "<=50K | \n", "
4 | \n", "28 | \n", "Private | \n", "Bachelors | \n", "Female | \n", "40 | \n", "Prof-specialty | \n", "<=50K | \n", "