📚 The CoCalc Library - books, templates and other resources
License: OTHER
import numpy as np1import pandas as pd2import os3from scipy import signal4from sklearn.datasets import load_boston5from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures6from sklearn.datasets import make_blobs78DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "data")91011def make_forge():12# a carefully hand-designed dataset lol13X, y = make_blobs(centers=2, random_state=4, n_samples=30)14y[np.array([7, 27])] = 015mask = np.ones(len(X), dtype=np.bool)16mask[np.array([0, 1, 5, 26])] = 017X, y = X[mask], y[mask]18return X, y192021def make_wave(n_samples=100):22rnd = np.random.RandomState(42)23x = rnd.uniform(-3, 3, size=n_samples)24y_no_noise = (np.sin(4 * x) + x)25y = (y_no_noise + rnd.normal(size=len(x))) / 226return x.reshape(-1, 1), y272829def load_extended_boston():30boston = load_boston()31X = boston.data3233X = MinMaxScaler().fit_transform(boston.data)34X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)35return X, boston.target363738def load_citibike():39data_mine = pd.read_csv(os.path.join(DATA_PATH, "citibike.csv"))40data_mine['one'] = 141data_mine['starttime'] = pd.to_datetime(data_mine.starttime)42data_starttime = data_mine.set_index("starttime")43data_resampled = data_starttime.resample("3h").sum().fillna(0)44return data_resampled.one454647def make_signals():48# fix a random state seed49rng = np.random.RandomState(42)50n_samples = 200051time = np.linspace(0, 8, n_samples)52# create three signals53s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal54s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal55s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal5657# concatenate the signals, add noise58S = np.c_[s1, s2, s3]59S += 0.2 * rng.normal(size=S.shape)6061S /= S.std(axis=0) # Standardize data62S -= S.min()63return S646566