Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
2108 views
Kernel: Python 3 (Anaconda)
import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set()
f_in = open('countries.tsv', 'r') f_out = open('countries_clean.tsv', 'w') f_out.write(f_in.read().replace(',', '.')) f_in.close() f_out.close()
data = pd.read_csv('countries_clean.tsv', sep='\t', header=0, index_col=0) data.head()
data.info()
<class 'pandas.core.frame.DataFrame'> Index: 52 entries, Австралія to Японія Data columns (total 12 columns): 1 52 non-null int64 2 52 non-null int64 3 52 non-null int64 4 52 non-null float64 5 52 non-null float64 6 52 non-null int64 7 52 non-null float64 8 52 non-null int64 9 52 non-null int64 10 52 non-null float64 11 52 non-null int64 12 52 non-null int64 dtypes: float64(4), int64(8) memory usage: 5.3+ KB
from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(data) data_std = scaler.transform(data)
data_std[:10]
array([[-0.31376135, -0.62898722, -0.27118946, -0.76340453, -0.60000748, 1.10768523, -0.32165659, 1.00692961, 0.77272626, 0.07345823, 0.80587611, 0.73639496], [-0.36196068, -0.9272304 , 0.64849653, -0.78256626, -0.85727723, 1.31118359, -0.22508265, -0.15855198, 0.72360429, -1.06190627, 0.68226089, 0.62822395], [-0.23457674, -0.13191525, 0.03537254, -0.17897178, -0.02115054, -0.65912222, -0.31144103, 1.05009559, 0.52711644, -0.00351563, 0.06418482, 0.1955399 ], [ 0.21348027, 1.35930064, 0.64849653, 2.38869996, 1.20088077, -1.08057942, 0.51844189, -1.97152335, -2.42020131, 1.054875 , -1.79004338, -2.18422233], [-0.35163225, -0.9272304 , 0.64849653, -0.76659815, -0.72864236, 1.24755749, 0.02240781, 1.48175544, 0.72360429, -1.06190627, 0.68226089, 0.62822395], [-0.35064859, -0.827816 , 0.64849653, -0.3897508 , -0.61287097, -0.25265134, -0.2714213 , 0.14360991, 0.72360429, -0.94644547, -0.18304561, 0.30371091], [ 0.3688985 , -0.03250086, 0.03537254, 1.11125133, -0.08546798, -0.79767989, -0.30512212, 0.57526976, -0.16059103, -0.0227591 , -1.29558253, -0.66982818], [-0.35212408, 2.55227336, 2.79443052, 2.77193455, 2.64159138, -1.06020329, -0.28616541, -2.01468933, -3.25527467, 1.44936606, -2.53173467, -2.50873537], [-0.11407842, -0.827816 , 0.64849653, -0.76659815, -0.64502969, 0.99279016, -0.07448207, 1.17959355, 0.72360429, -1.06190627, 0.80587611, 0.73639496], [-0.04177943, 0.5639855 , -0.27118946, 0.47252702, 0.31973188, -1.07689857, -0.09449194, -1.79885941, 0.18326271, 0.45832755, -0.55389125, -0.56165717]])
from scipy.spatial.distance import pdist, squareform d = squareform(pdist(data_std))
from sklearn.manifold import MDS model = MDS(n_components=2, dissimilarity='precomputed') out = model.fit_transform(d) fig, ax = plt.subplots() x = out[:, 0] y = out[:, 1] ax.scatter(x, y) for i, txt in enumerate(data.index): ax.annotate(txt, (x[i], y[i])) plt.axis('equal');
Image in a Jupyter notebook