CoCalc -- stats.py

Kalman-and-Bayesian-Filters-in-Python / filterpy / stats / stats.py
⁹⁶¹²⁹ views
1
# -*- coding: utf-8 -*-
2

3
"""Copyright 2015 Roger R Labbe Jr.
4

5
FilterPy library.
6
http://github.com/rlabbe/filterpy
7

8
Documentation at:
9
https://filterpy.readthedocs.org
10

11
Supporting book at:
12
https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python
13

14
This is licensed under an MIT license. See the readme.MD file
15
for more information.
16
"""
17

18

19
from __future__ import (absolute_import, division, print_function,
20
                        unicode_literals)
21

22
import math
23
import numpy as np
24
import numpy.linalg as linalg
25
import matplotlib.pyplot as plt
26
import scipy.sparse as sp
27
import scipy.sparse.linalg as spln
28
import scipy.stats
29
from scipy.stats import norm
30
from matplotlib.patches import Ellipse
31
import random
32

33

34
def gaussian(x, mean, var):
35
    """returns normal distribution (pdf) for x given a Gaussian with the
36
    specified mean and variance. All must be scalars.
37

38
    gaussian (1,2,3) is equivalent to scipy.stats.norm(2,math.sqrt(3)).pdf(1)
39
    It is quite a bit faster albeit much less flexible than the latter.
40

41
    **Parameters**
42

43
    x : scalar or array-like
44
        The value for which we compute the probability
45

46
    mean : scalar
47
        Mean of the Gaussian
48

49
    var : scalar
50
        Variance of the Gaussian
51

52
    **Returns**
53

54
    probability : float
55
        probability of x for the Gaussian (mean, var). E.g. 0.101 denotes
56
        10.1%.
57

58
    **Examples**
59

60
    gaussian(8, 1, 2)
61
    gaussian([8, 7, 9], 1, 2)
62
    """
63

64
    return (np.exp((-0.5*(np.asarray(x)-mean)**2)/var) /
65
            math.sqrt(2*math.pi*var))
66

67

68
def mul (mean1, var1, mean2, var2):
69
    """ multiply Gaussians (mean1, var1) with (mean2, var2) and return the
70
    results as a tuple (mean,var).
71

72
    var1 and var2 are variances - sigma squared in the usual parlance.
73
    """
74

75
    mean = (var1*mean2 + var2*mean1) / (var1 + var2)
76
    var = 1 / (1/var1 + 1/var2)
77
    return (mean, var)
78

79

80
def add (mean1, var1, mean2, var2):
81
    """ add the Gaussians (mean1, var1) with (mean2, var2) and return the
82
    results as a tuple (mean,var).
83

84
    var1 and var2 are variances - sigma squared in the usual parlance.
85
    """
86

87
    return (mean1+mean2, var1+var2)
88

89

90
def multivariate_gaussian(x, mu, cov):
91
    """ This is designed to replace scipy.stats.multivariate_normal
92
    which is not available before version 0.14. You may either pass in a
93
    multivariate set of data:
94

95
       multivariate_gaussian (array([1,1]), array([3,4]), eye(2)*1.4)
96
       multivariate_gaussian (array([1,1,1]), array([3,4,5]), 1.4)
97

98
    or unidimensional data:
99
       multivariate_gaussian(1, 3, 1.4)
100

101
    In the multivariate case if cov is a scalar it is interpreted as eye(n)*cov
102

103
    The function gaussian() implements the 1D (univariate)case, and is much
104
    faster than this function.
105

106
    equivalent calls:
107
       multivariate_gaussian(1, 2, 3)
108
       scipy.stats.multivariate_normal(2,3).pdf(1)
109

110

111
    **Parameters**
112

113
    x : float, or np.array-like
114
       Value to compute the probability for. May be a scalar if univariate,
115
       or any type that can be converted to an np.array (list, tuple, etc).
116
       np.array is best for speed.
117

118
    mu :  float, or np.array-like
119
       mean for the Gaussian . May be a scalar if univariate,  or any type
120
       that can be converted to an np.array (list, tuple, etc).np.array is
121
       best for speed.
122

123
    cov :  float, or np.array-like
124
       Covariance for the Gaussian . May be a scalar if univariate,  or any
125
       type that can be converted to an np.array (list, tuple, etc).np.array is
126
       best for speed.
127

128
    **Returns**
129

130
    probability : float
131
        probability for x for the Gaussian (mu,cov)
132
    """
133

134
    # force all to numpy.array type
135
    x   = np.array(x, copy=False, ndmin=1)
136
    mu  = np.array(mu,copy=False, ndmin=1)
137

138
    nx = len(mu)
139
    cov = _to_cov(cov, nx)
140

141
    norm_coeff = nx*math.log(2*math.pi) + np.linalg.slogdet(cov)[1]
142

143
    err = x - mu
144
    if (sp.issparse(cov)):
145
        numerator = spln.spsolve(cov, err).T.dot(err)
146
    else:
147
        numerator = np.linalg.solve(cov, err).T.dot(err)
148

149
    return math.exp(-0.5*(norm_coeff + numerator))
150

151

152
def multivariate_multiply(m1, c1, m2, c2):
153
    """ Multiplies the two multivariate Gaussians together and returns the
154
    results as the tuple (mean, covariance).
155

156
    **example**
157

158
    m, c = multivariate_multiply([7.0, 2], [[1.0, 2.0], [2.0, 1.0]],
159
                                 [3.2, 0], [[8.0, 1.1], [1.1,8.0]])
160

161
    **Parameters**
162

163
    m1 : array-like
164
        Mean of first Gaussian. Must be convertable to an 1D array via
165
        numpy.asarray(), For example 6, [6], [6, 5], np.array([3, 4, 5, 6])
166
        are all valid.
167

168
    c1 : matrix-like
169
        Mean of first Gaussian. Must be convertable to an 2D array via
170
        numpy.asarray().
171

172
     m2 : array-like
173
        Mean of second Gaussian. Must be convertable to an 1D array via
174
        numpy.asarray(), For example 6, [6], [6, 5], np.array([3, 4, 5, 6])
175
        are all valid.
176

177
    c2 : matrix-like
178
        Mean of second Gaussian. Must be convertable to an 2D array via
179
        numpy.asarray().
180

181
    **Returns**
182

183
    m : ndarray
184
        mean of the result
185

186
    c : ndarray
187
        covariance of the result
188
    """
189

190
    C1 = np.asarray(c1)
191
    C2 = np.asarray(c2)
192
    M1 = np.asarray(m1)
193
    M2 = np.asarray(m2)
194

195
    sum_inv = np.linalg.inv(C1+C2)
196
    C3 = np.dot(C1, sum_inv).dot(C2)
197

198
    M3 = (np.dot(C2, sum_inv).dot(M1) +
199
          np.dot(C1, sum_inv).dot(M2))
200

201
    return M3, C3
202

203

204

205
def plot_gaussian(mean, variance,
206
                  mean_line=False,
207
                  xlim=None,
208
                  ylim=None,
209
                  xlabel=None,
210
                  ylabel=None):
211
    """ plots the normal distribution with the given mean and variance. x-axis
212
    contains the mean, the y-axis shows the probability.
213

214
    **parameters**
215

216
    mean_line : boolean
217
        draws a line at x=mean
218

219
    xlim, ylim: (float,float), optional
220
        specify the limits for the x or y axis as tuple (low,high).
221
        If not specified, limits will be automatically chosen to be 'nice'
222

223
    xlabel : str,optional
224
        label for the x-axis
225

226
    ylabel : str, optional
227
        label for the y-axis
228
    """
229

230
    sigma = math.sqrt(variance)
231
    n = scipy.stats.norm(mean, sigma)
232

233
    if xlim is None:
234
        min_x = n.ppf(0.001)
235
        max_x = n.ppf(0.999)
236
    else:
237
        min_x = xlim[0]
238
        max_x = xlim[1]
239
    xs = np.arange(min_x, max_x, (max_x - min_x) / 1000)
240
    plt.plot(xs,n.pdf(xs))
241
    plt.xlim((min_x, max_x))
242

243
    if ylim is not None:
244
        plt.ylim(ylim)
245

246
    if mean_line:
247
        plt.axvline(mean)
248
    if xlabel:
249
       plt.xlabel(xlabel)
250
    if ylabel:
251
       plt.ylabel(ylabel)
252

253

254
def covariance_ellipse(P, deviations=1):
255
    """ returns a tuple defining the ellipse representing the 2 dimensional
256
    covariance matrix P.
257

258
    **Parameters**
259

260
    P : nd.array shape (2,2)
261
       covariance matrix
262

263
    deviations : int (optional, default = 1)
264
       # of standard deviations. Default is 1.
265

266
    Returns (angle_radians, width_radius, height_radius)
267
    """
268

269
    U,s,v = linalg.svd(P)
270
    orientation = math.atan2(U[1,0],U[0,0])
271
    width  = deviations*math.sqrt(s[0])
272
    height = deviations*math.sqrt(s[1])
273

274
    assert width >= height
275
    return (orientation, width, height)
276

277

278
def plot_covariance_ellipse(mean, cov=None, variance = 1.0, std=None,
279
             ellipse=None, title=None, axis_equal=True,
280
             facecolor=None, edgecolor=None,
281
             fc='none', ec='#004080',
282
             alpha=1.0, xlim=None, ylim=None,
283
             ls='solid'):
284
    """ plots the covariance ellipse where
285

286
    mean is a (x,y) tuple for the mean of the covariance (center of ellipse)
287

288
    cov is a 2x2 covariance matrix.
289

290
    `variance` is the normal sigma^2 that we want to plot. If list-like,
291
    ellipses for all ellipses will be ploted. E.g. [1,2] will plot the
292
    sigma^2 = 1 and sigma^2 = 2 ellipses. Alternatively, use std for the
293
    standard deviation, in which case `variance` will be ignored.
294

295
    ellipse is a (angle,width,height) tuple containing the angle in radians,
296
    and width and height radii.
297

298
    You may provide either cov or ellipse, but not both.
299

300
    plt.show() is not called, allowing you to plot multiple things on the
301
    same figure.
302
    """
303

304
    assert cov is None or ellipse is None
305
    assert not (cov is None and ellipse is None)
306

307
    if facecolor is None:
308
        facecolor = fc
309

310
    if edgecolor is None:
311
        edgecolor = ec
312

313
    if cov is not None:
314
        ellipse = covariance_ellipse(cov)
315

316
    if axis_equal:
317
        #plt.gca().set_aspect('equal')
318
        plt.axis('equal')
319

320
    if title is not None:
321
        plt.title (title)
322

323
    compute_std = False
324
    if std is None:
325
        std = variance
326
        compute_std = True
327

328

329
    if np.isscalar(std):
330
            std = [std]
331

332
    if compute_std:
333
        std = np.sqrt(np.asarray(std))
334

335
    ax = plt.gca()
336

337
    angle = np.degrees(ellipse[0])
338
    width = ellipse[1] * 2.
339
    height = ellipse[2] * 2.
340

341
    for sd in std:
342
        e = Ellipse(xy=mean, width=sd*width, height=sd*height, angle=angle,
343
                    facecolor=facecolor,
344
                    edgecolor=edgecolor,
345
                    alpha=alpha,
346
                    lw=2, ls=ls)
347
        ax.add_patch(e)
348
    plt.scatter(mean[0], mean[1], marker='+') # mark the center
349
    if xlim is not None:
350
        ax.set_xlim(xlim)
351

352
    if ylim is not None:
353
        ax.set_ylim(ylim)
354

355

356
def norm_cdf (x_range, mu, var=1, std=None):
357
    """ computes the probability that a Gaussian distribution lies
358
    within a range of values.
359

360
    **Paramateters**
361

362
    x_range : (float, float)
363
        tuple of range to compute probability for
364

365
    mu : float
366
        mean of the Gaussian
367

368
    var : float, optional
369
        variance of the Gaussian. Ignored if std is provided
370

371
    std : float, optional
372
       standard deviation of the Gaussian. This overrides the var parameter
373

374

375
    **Returns**
376

377
    probability : float
378
        probability that Gaussian is within x_range. E.g. .1 means 10%.
379
    """
380

381
    if std is None:
382
        std = math.sqrt(var)
383
    return abs(norm.cdf(x_range[0], loc=mu, scale=std) -
384
               norm.cdf(x_range[1], loc=mu, scale=std))
385

386

387
def _is_inside_ellipse(x, y, ex, ey, orientation, width, height):
388

389
    co = np.cos(orientation)
390
    so = np.sin(orientation)
391

392
    xx = x*co + y*so
393
    yy = y*co - x*so
394

395
    return (xx / width)**2 + (yy / height)**2 <= 1.
396

397

398
    return ((x-ex)*co - (y-ey)*so)**2/width**2 + \
399
           ((x-ex)*so + (y-ey)*co)**2/height**2 <= 1
400

401
def _to_cov(x,n):
402
    """ If x is a scalar, returns a covariance matrix generated from it
403
    as the identity matrix multiplied by x. The dimension will be nxn.
404
    If x is already a numpy array then it is returned unchanged.
405
    """
406
    try:
407
        x.shape
408
        if type(x) != np.ndarray:
409
            x = np.asarray(x)[0]
410
        return x
411
    except:
412
        return np.eye(n) * x
413

414

415
def _do_plot_test():
416

417
    from numpy.random import multivariate_normal
418
    p = np.array([[32, 15],[15., 40.]])
419

420
    x,y = multivariate_normal(mean=(0,0), cov=p, size=5000).T
421
    sd = 2
422
    a,w,h = covariance_ellipse(p,sd)
423
    print (np.degrees(a), w, h)
424

425
    count = 0
426
    color=[]
427
    for i in range(len(x)):
428
        if _is_inside_ellipse(x[i], y[i], 0, 0, a, w, h):
429
            color.append('b')
430
            count += 1
431
        else:
432
            color.append('r')
433
    plt.scatter(x,y,alpha=0.2, c=color)
434

435

436
    plt.axis('equal')
437

438
    plot_covariance_ellipse(mean=(0., 0.),
439
                            cov = p,
440
                            std=sd,
441
                            facecolor='none')
442

443
    print (count / len(x))
444

445

446
def plot_std_vs_var():
447
    plt.figure()
448
    x = (0,0)
449
    P = np.array([[3,1],[1,3]])
450
    plot_covariance_ellipse(x, P, std=[1,2,3], facecolor='g', alpha=.2)
451
    plot_covariance_ellipse(x, P, variance=[1,2,3], facecolor='r', alpha=.5)
452

453

454
def rand_student_t(df, mu=0, std=1):
455
    """return random number distributed by student's t distribution with
456
    `df` degrees of freedom with the specified mean and standard deviation.
457
    """
458
    x = random.gauss(0, std)
459
    y = 2.0*random.gammavariate(0.5*df, 2.0)
460
    return x / (math.sqrt(y/df)) + mu
461

462

463
if __name__ == '__main__':
464
    plot_std_vs_var()
465
    plt.figure()
466

467
    _do_plot_test()
468

469
    #test_gaussian()
470

471
    # test conversion of scalar to covariance matrix
472
    x  = multivariate_gaussian(np.array([1,1]), np.array([3,4]), np.eye(2)*1.4)
473
    x2 = multivariate_gaussian(np.array([1,1]), np.array([3,4]), 1.4)
474
    assert x == x2
475

476
    # test univarate case
477
    rv = norm(loc = 1., scale = np.sqrt(2.3))
478
    x2 = multivariate_gaussian(1.2, 1., 2.3)
479
    x3 = gaussian(1.2, 1., 2.3)
480

481
    assert rv.pdf(1.2) == x2
482
    assert abs(x2- x3) < 0.00000001
483

484
    cov = np.array([[1.0, 1.0],
485
                    [1.0, 1.1]])
486

487
    plt.figure()
488
    P = np.array([[2,0],[0,2]])
489
    plot_covariance_ellipse((2,7), cov=cov, variance=[1,2], facecolor='g',
490
                            title='my title', alpha=.2, ls='dashed')
491
    plt.show()
492

493
    print("all tests passed")
494

495
Product

Resources

Company