1
2 """
3 Created on Tue Jul 1 16:30:38 2014
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Tommy Löfstedt
8 @email: lofstedt.tommy@gmail.com
9 @license: BSD 3-clause.
10 """
11 import numpy as np
12
13 from . import consts
14 from . import deprecated
15
16 __all__ = ["multivariate_normal", "sensitivity", "precision", "specificity",
17 "npv", "F_score", "fleiss_kappa"]
21 """Generates n random vectors from the multivariate normal distribution
22 with mean mu and covariance matrix Sigma.
23
24 This function is faster (roughly 11 times faster for a 600-by-4000 matrix
25 on my computer) than numpy.random.multivariate_normal. This method differs
26 from numpy's function in that it uses the Cholesky factorisation. Note
27 that this requires the covariance matrix to be positive definite, as
28 opposed to positive semi-definite in the numpy case.
29
30 See details at: https://en.wikipedia.org/wiki/
31 Multivariate_normal_distribution#Drawing_values_from_the_distribution
32
33 Parameters
34 ----------
35 mu : Numpy array, shape (n, 1). The mean vector.
36
37 Sigma : Numpy array, shape (p, p). The covariance matrix.
38
39 n : Integer. The number of multivariate normal vectors to generate.
40
41 Example
42 -------
43 >>> import parsimony.utils.stats as stats
44 >>> import numpy as np
45 >>> np.random.seed(42)
46 >>> n, p = 50000, 100
47 >>> mu = np.random.rand(p, 1)
48 >>> alpha = 0.01
49 >>> Sigma = alpha * np.random.rand(p, p) + (1 - alpha) * np.eye(p, p)
50 >>> M = stats.multivariate_normal(mu, Sigma, n)
51 >>> mean = np.mean(M, axis=0)
52 >>> S = np.dot((M - mean).T, (M - mean)) * (1.0 / float(n - 1))
53 >>> round(np.linalg.norm(Sigma - S), 14)
54 0.51886218849785
55 """
56
57 n = max(1, int(n))
58 mu = np.array(mu)
59 if not isinstance(Sigma, np.ndarray):
60 Sigma = np.array(Sigma)
61
62
63 if len(Sigma.shape) != 2 or Sigma.shape[0] != Sigma.shape[1]:
64 raise ValueError("Sigma must be a square matrix.")
65 p = Sigma.shape[0]
66 if not (len(mu.shape) == 1 or \
67 (len(mu.shape) == 2 and min(mu.shape) == 1)):
68 raise ValueError("The mean 'mu' must be 1 dimensional or " \
69 "p-by-1 dimensional")
70 mu = mu.reshape((max(mu.shape),))
71 if mu.shape[0] != Sigma.shape[0]:
72 raise ValueError("Mu and Sigma must have matching dimensions.")
73
74 A = np.linalg.cholesky(Sigma)
75 z = np.random.randn(p, n)
76
77
78 rand = mu[:, np.newaxis] + np.dot(A, z)
79
80 return rand.T
81
84 """A test's ability to identify a condition correctly.
85
86 Also called true positive rate or recall.
87
88 Parameters
89 ----------
90 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
91
92 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
93
94 Example
95 -------
96 >>> import parsimony.utils.stats as stats
97 >>> import numpy as np
98 >>> np.random.seed(42)
99 >>> p = 2030
100 >>> cond = np.zeros((p, 1))
101 >>> test = np.zeros((p, 1))
102 >>> stats.sensitivity(cond, test)
103 1.0
104 >>> stats.sensitivity(cond, np.logical_not(test))
105 1.0
106 >>> cond[:30] = 1.0
107 >>> test[:30] = 1.0
108 >>> test[:10] = 0.0
109 >>> test[-180:] = 1.0
110 >>> round(stats.sensitivity(cond, test), 2)
111 0.67
112 """
113 true_pos = np.logical_and((cond == 1), (test == 1))
114 false_neg = np.logical_and((cond == 1), (test == 0))
115
116 TP = float(true_pos.sum())
117 FN = float(false_neg.sum())
118
119 if FN > 0:
120 value = TP / (TP + FN)
121 else:
122 value = 1.0
123
124 return value
125
128 """A test's ability to exclude a condition correctly.
129
130 Also called true negative rate.
131
132 Parameters
133 ----------
134 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
135
136 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
137
138 Example
139 -------
140 >>> import parsimony.utils.stats as stats
141 >>> import numpy as np
142 >>> np.random.seed(42)
143 >>> p = 2030
144 >>> cond = np.zeros((p, 1))
145 >>> test = np.zeros((p, 1))
146 >>> stats.specificity(cond, test)
147 1.0
148 >>> stats.specificity(cond, np.logical_not(test))
149 0.0
150 >>> cond[:30] = 1.0
151 >>> test[:30] = 1.0
152 >>> test[:10] = 0.0
153 >>> test[-180:] = 1.0
154 >>> round(stats.specificity(cond, test), 2)
155 0.91
156 """
157 true_neg = np.logical_and((cond == 0), (test == 0))
158 false_pos = np.logical_and((cond == 0), (test == 1))
159
160 TN = float(true_neg.sum())
161 FP = float(false_pos.sum())
162
163 if FP > 0:
164 value = TN / (TN + FP)
165 else:
166 value = 1.0
167
168 return value
169
170
171 -def ppv(cond, test):
172 """A test's ability to correctly identify positive outcomes.
173
174 Short for positive predictive value. Also called precision.
175
176 Parameters
177 ----------
178 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
179
180 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
181
182 Example
183 -------
184 >>> import parsimony.utils.stats as stats
185 >>> import numpy as np
186 >>> np.random.seed(42)
187 >>> p = 2030
188 >>> cond = np.zeros((p, 1))
189 >>> test = np.zeros((p, 1))
190 >>> stats.ppv(cond, test)
191 1.0
192 >>> stats.ppv(cond, np.logical_not(test))
193 0.0
194 >>> cond[:30] = 1.0
195 >>> test[:30] = 1.0
196 >>> test[:10] = 0.0
197 >>> test[-180:] = 1.0
198 >>> round(stats.ppv(cond, test), 2)
199 0.1
200 """
201 true_pos = np.logical_and((cond == 1), (test == 1))
202 false_pos = np.logical_and((cond == 0), (test == 1))
203
204 TP = float(true_pos.sum())
205 FP = float(false_pos.sum())
206
207 if FP > 0:
208 value = TP / (TP + FP)
209 else:
210 value = 1.0
211
212 return value
213
214
215 @deprecated("ppv")
216 -def precision(cond, test):
217 return ppv(cond, test)
218
219
220 -def npv(cond, test):
221 """A test's ability to correctly identify negative outcomes.
222
223 The negative predictive value, NPV.
224
225 Parameters
226 ----------
227 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
228
229 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
230
231 Example
232 -------
233 >>> import parsimony.utils.stats as stats
234 >>> import numpy as np
235 >>> np.random.seed(42)
236 >>> p = 2030
237 >>> cond = np.zeros((p, 1))
238 >>> test = np.zeros((p, 1))
239 >>> stats.npv(cond, test)
240 1.0
241 >>> stats.npv(cond, np.logical_not(test))
242 1.0
243 >>> cond[:30] = 1.0
244 >>> test[:30] = 1.0
245 >>> test[:10] = 0.0
246 >>> test[-180:] = 1.0
247 >>> round(stats.npv(cond, test), 3)
248 0.995
249 """
250 true_neg = np.logical_and((cond == 0), (test == 0))
251 false_neg = np.logical_and((cond == 1), (test == 0))
252
253 TN = float(true_neg.sum())
254 FN = float(false_neg.sum())
255
256 if FN > 0:
257 value = TN / (TN + FN)
258 else:
259 value = 1.0
260
261 return value
262
265 """The degree of correctly estimated outcomes.
266
267 Parameters
268 ----------
269 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
270
271 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
272
273 Example
274 -------
275 >>> import parsimony.utils.stats as stats
276 >>> import numpy as np
277 >>> np.random.seed(42)
278 >>> p = 2030
279 >>> cond = np.zeros((p, 1))
280 >>> test = np.zeros((p, 1))
281 >>> stats.accuracy(cond, test)
282 1.0
283 >>> stats.accuracy(cond, np.logical_not(test))
284 0.0
285 >>> cond[:30] = 1.0
286 >>> test[:30] = 1.0
287 >>> test[:10] = 0.0
288 >>> test[-180:] = 1.0
289 >>> round(stats.accuracy(cond, test), 2)
290 0.91
291 """
292 true_pos = np.logical_and((cond == 1), (test == 1))
293 true_neg = np.logical_and((cond == 0), (test == 0))
294
295 TP = float(true_pos.sum())
296 TN = float(true_neg.sum())
297
298 n = np.prod(cond.shape)
299
300 value = (TP + TN) / float(n)
301
302 return value
303
306 """A measure of a test's accuracy by a weighted average of the precision
307 and sensitivity.
308
309 Also called the harmonic mean of precision and sensitivity.
310
311 Parameters
312 ----------
313 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
314
315 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
316
317 Example
318 -------
319 >>> import parsimony.utils.stats as stats
320 >>> import numpy as np
321 >>> np.random.seed(42)
322 >>> p = 2030
323 >>> cond = np.zeros((p, 1))
324 >>> test = np.zeros((p, 1))
325 >>> stats.F_score(cond, test)
326 1.0
327 >>> stats.F_score(cond, np.logical_not(test))
328 0.0
329 >>> cond[:30] = 1.0
330 >>> test[:30] = 1.0
331 >>> test[:10] = 0.0
332 >>> test[-180:] = 1.0
333 >>> round(stats.F_score(cond, test), 2)
334 0.17
335 """
336 PR = precision(cond, test)
337 RE = sensitivity(cond, test)
338
339 value = 2.0 * PR * RE / (PR + RE)
340
341 return value
342
343
344 -def alpha(cond, test):
345 """False positive rate or type I error.
346
347 Parameters
348 ----------
349 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
350
351 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
352
353 Example
354 -------
355 >>> import parsimony.utils.stats as stats
356 >>> import numpy as np
357 >>> np.random.seed(42)
358 >>> p = 2030
359 >>> cond = np.zeros((p, 1))
360 >>> test = np.zeros((p, 1))
361 >>> stats.alpha(cond, test)
362 0.0
363 >>> stats.alpha(cond, np.logical_not(test))
364 1.0
365 >>> cond[:30] = 1.0
366 >>> test[:30] = 1.0
367 >>> test[:10] = 0.0
368 >>> test[-180:] = 1.0
369 >>> round(stats.alpha(cond, test), 2)
370 0.09
371 """
372 return 1.0 - specificity(cond, test)
373
374
375 -def beta(cond, test):
376 """False negative rate or type II error.
377
378 Parameters
379 ----------
380 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
381
382 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
383
384 Example
385 -------
386 >>> import parsimony.utils.stats as stats
387 >>> import numpy as np
388 >>> np.random.seed(42)
389 >>> p = 2030
390 >>> cond = np.zeros((p, 1))
391 >>> test = np.zeros((p, 1))
392 >>> stats.beta(cond, test)
393 0.0
394 >>> stats.beta(cond, np.logical_not(test))
395 0.0
396 >>> cond[:30] = 1.0
397 >>> test[:30] = 1.0
398 >>> test[:10] = 0.0
399 >>> test[-180:] = 1.0
400 >>> round(stats.beta(cond, test), 2)
401 0.33
402 """
403 return 1.0 - sensitivity(cond, test)
404
405
406 -def power(cond, test):
407 """Statistical power for a test. The probability that it correctly rejects
408 the null hypothesis when the null hypothesis is false.
409
410 Parameters
411 ----------
412 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
413
414 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
415
416 Example
417 -------
418 >>> import parsimony.utils.stats as stats
419 >>> import numpy as np
420 >>> np.random.seed(42)
421 >>> p = 2030
422 >>> cond = np.zeros((p, 1))
423 >>> test = np.zeros((p, 1))
424 >>> stats.power(cond, test)
425 1.0
426 >>> stats.power(cond, np.logical_not(test))
427 1.0
428 >>> cond[:30] = 1.0
429 >>> test[:30] = 1.0
430 >>> test[:10] = 0.0
431 >>> test[-180:] = 1.0
432 >>> round(stats.power(cond, test), 2)
433 0.67
434 """
435 return 1.0 - beta(cond, test)
436
439 """Assesses the value of performing a diagnostic test for the positive
440 outcome.
441
442 Parameters
443 ----------
444 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
445
446 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
447
448 Example
449 -------
450 >>> import parsimony.utils.stats as stats
451 >>> import numpy as np
452 >>> np.random.seed(42)
453 >>> p = 2030
454 >>> cond = np.zeros((p, 1))
455 >>> test = np.zeros((p, 1))
456 >>> stats.likelihood_ratio_positive(cond, test)
457 inf
458 >>> stats.likelihood_ratio_positive(cond, np.logical_not(test))
459 1.0
460 >>> cond[:30] = 1.0
461 >>> test[:30] = 1.0
462 >>> test[:10] = 0.0
463 >>> test[-180:] = 1.0
464 >>> round(stats.likelihood_ratio_positive(cond, test), 1)
465 7.4
466 """
467 sens = sensitivity(cond, test)
468 spec = specificity(cond, test)
469
470 if spec == 1.0:
471 return np.inf
472 else:
473 return sens / (1.0 - spec)
474
477 """Assesses the value of performing a diagnostic test for the negative
478 outcome.
479
480 Parameters
481 ----------
482 cond : Numpy array, boolean or 0/1 integer. The "true", known condition.
483
484 test : Numpy array, boolean or 0/1 integer. The estimated outcome.
485
486 Example
487 -------
488 >>> import parsimony.utils.stats as stats
489 >>> import numpy as np
490 >>> np.random.seed(42)
491 >>> p = 2030
492 >>> cond = np.zeros((p, 1))
493 >>> test = np.zeros((p, 1))
494 >>> stats.likelihood_ratio_negative(cond, test)
495 0.0
496 >>> stats.likelihood_ratio_negative(cond, np.logical_not(test))
497 inf
498 >>> cond[:30] = 1.0
499 >>> test[:30] = 1.0
500 >>> test[:10] = 0.0
501 >>> test[-180:] = 1.0
502 >>> round(stats.likelihood_ratio_negative(cond, test), 2)
503 0.37
504 """
505 sens = sensitivity(cond, test)
506 spec = specificity(cond, test)
507
508 if spec == 0.0:
509 return np.inf
510 else:
511 return (1.0 - sens) / spec
512
515 """Computes Fleiss' kappa for a set of variables classified into k
516 categories by a number of different raters.
517
518 W is a matrix with shape (variables, raters) with k categories between
519 0, ..., k - 1.
520 """
521 N, n = W.shape
522 if n <= 1:
523 raise ValueError("At least two ratings are needed")
524 A = np.zeros((N, k))
525 Nn = N * n
526 p = [0.0] * k
527 for j in xrange(k):
528 A[:, j] = np.sum(W == j, axis=1)
529
530 p[j] = np.sum(A[:, j]) / float(Nn)
531
532 P = [0.0] * N
533 for i in xrange(N):
534 for j in xrange(k):
535 P[i] += A[i, j] ** 2.0
536 P[i] -= n
537 P[i] /= float(n * (n - 1))
538
539 P_ = sum(P) / float(N)
540 Pe = sum([pj ** 2.0 for pj in p])
541
542 if abs(Pe - 1) < consts.TOLERANCE:
543 kappa = 1.0
544 else:
545 kappa = (P_ - Pe) / (1.0 - Pe)
546 if kappa > 1.0:
547 kappa = 1.0
548
549 return kappa
550