Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132928 views
License: OTHER
1
import numpy as np
2
import pandas as pd
3
import os
4
from scipy import signal
5
from sklearn.datasets import load_boston
6
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
7
from sklearn.datasets import make_blobs
8
9
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "data")
10
11
12
def make_forge():
13
# a carefully hand-designed dataset lol
14
X, y = make_blobs(centers=2, random_state=4, n_samples=30)
15
y[np.array([7, 27])] = 0
16
mask = np.ones(len(X), dtype=np.bool)
17
mask[np.array([0, 1, 5, 26])] = 0
18
X, y = X[mask], y[mask]
19
return X, y
20
21
22
def make_wave(n_samples=100):
23
rnd = np.random.RandomState(42)
24
x = rnd.uniform(-3, 3, size=n_samples)
25
y_no_noise = (np.sin(4 * x) + x)
26
y = (y_no_noise + rnd.normal(size=len(x))) / 2
27
return x.reshape(-1, 1), y
28
29
30
def load_extended_boston():
31
boston = load_boston()
32
X = boston.data
33
34
X = MinMaxScaler().fit_transform(boston.data)
35
X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
36
return X, boston.target
37
38
39
def load_citibike():
40
data_mine = pd.read_csv(os.path.join(DATA_PATH, "citibike.csv"))
41
data_mine['one'] = 1
42
data_mine['starttime'] = pd.to_datetime(data_mine.starttime)
43
data_starttime = data_mine.set_index("starttime")
44
data_resampled = data_starttime.resample("3h").sum().fillna(0)
45
return data_resampled.one
46
47
48
def make_signals():
49
# fix a random state seed
50
rng = np.random.RandomState(42)
51
n_samples = 2000
52
time = np.linspace(0, 8, n_samples)
53
# create three signals
54
s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal
55
s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal
56
s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal
57
58
# concatenate the signals, add noise
59
S = np.c_[s1, s2, s3]
60
S += 0.2 * rng.normal(size=S.shape)
61
62
S /= S.std(axis=0) # Standardize data
63
S -= S.min()
64
return S
65
66