CoCalc -- Countries.ipynb

Econometrics/Spring 2018 / Reduction / Multidimensional Scaling / Countries / Countries.ipynb

²¹⁰⁸ views

Kernel: Python 3 (Anaconda)

In [1]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [2]:

f_in = open('countries.tsv', 'r')
f_out = open('countries_clean.tsv', 'w')
f_out.write(f_in.read().replace(',', '.'))
f_in.close()
f_out.close()

In [3]:

data = pd.read_csv('countries_clean.tsv', sep='\t', header=0, index_col=0)
data.head()

Out[3]:

In [4]:

data.info()

Out[4]:

<class 'pandas.core.frame.DataFrame'>
Index: 52 entries, Австралія to Японія
Data columns (total 12 columns):
   52 non-null int64
   52 non-null int64
   52 non-null int64
   52 non-null float64
   52 non-null float64
   52 non-null int64
   52 non-null float64
   52 non-null int64
   52 non-null int64
  52 non-null float64
  52 non-null int64
  52 non-null int64
dtypes: float64(4), int64(8)
memory usage: 5.3+ KB

In [5]:

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(data)
data_std = scaler.transform(data)

In [6]:

data_std[:10]

Out[6]:

array([[-0.31376135, -0.62898722, -0.27118946, -0.76340453, -0.60000748,
         1.10768523, -0.32165659,  1.00692961,  0.77272626,  0.07345823,
         0.80587611,  0.73639496],
       [-0.36196068, -0.9272304 ,  0.64849653, -0.78256626, -0.85727723,
         1.31118359, -0.22508265, -0.15855198,  0.72360429, -1.06190627,
         0.68226089,  0.62822395],
       [-0.23457674, -0.13191525,  0.03537254, -0.17897178, -0.02115054,
        -0.65912222, -0.31144103,  1.05009559,  0.52711644, -0.00351563,
         0.06418482,  0.1955399 ],
       [ 0.21348027,  1.35930064,  0.64849653,  2.38869996,  1.20088077,
        -1.08057942,  0.51844189, -1.97152335, -2.42020131,  1.054875  ,
        -1.79004338, -2.18422233],
       [-0.35163225, -0.9272304 ,  0.64849653, -0.76659815, -0.72864236,
         1.24755749,  0.02240781,  1.48175544,  0.72360429, -1.06190627,
         0.68226089,  0.62822395],
       [-0.35064859, -0.827816  ,  0.64849653, -0.3897508 , -0.61287097,
        -0.25265134, -0.2714213 ,  0.14360991,  0.72360429, -0.94644547,
        -0.18304561,  0.30371091],
       [ 0.3688985 , -0.03250086,  0.03537254,  1.11125133, -0.08546798,
        -0.79767989, -0.30512212,  0.57526976, -0.16059103, -0.0227591 ,
        -1.29558253, -0.66982818],
       [-0.35212408,  2.55227336,  2.79443052,  2.77193455,  2.64159138,
        -1.06020329, -0.28616541, -2.01468933, -3.25527467,  1.44936606,
        -2.53173467, -2.50873537],
       [-0.11407842, -0.827816  ,  0.64849653, -0.76659815, -0.64502969,
         0.99279016, -0.07448207,  1.17959355,  0.72360429, -1.06190627,
         0.80587611,  0.73639496],
       [-0.04177943,  0.5639855 , -0.27118946,  0.47252702,  0.31973188,
        -1.07689857, -0.09449194, -1.79885941,  0.18326271,  0.45832755,
        -0.55389125, -0.56165717]])

In [7]:

from scipy.spatial.distance import pdist, squareform
d = squareform(pdist(data_std))

In [8]:

from sklearn.manifold import MDS
model = MDS(n_components=2, dissimilarity='precomputed')
out = model.fit_transform(d)

fig, ax = plt.subplots()
x = out[:, 0]
y = out[:, 1]
ax.scatter(x, y)

for i, txt in enumerate(data.index):
    ax.annotate(txt, (x[i], y[i]))
plt.axis('equal');

Out[8]:

In [0]:

Product

Resources

Company