Package parsimony :: Package utils :: Module stats
[hide private]
[frames] | no frames]

Source Code for Module parsimony.utils.stats

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Created on Tue Jul  1 16:30:38 2014 
  4   
  5  Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 
  6   
  7  @author:  Tommy Löfstedt 
  8  @email:   lofstedt.tommy@gmail.com 
  9  @license: BSD 3-clause. 
 10  """ 
 11  import numpy as np 
 12   
 13  from . import consts 
 14  from . import deprecated 
 15   
 16  __all__ = ["multivariate_normal", "sensitivity", "precision", "specificity", 
 17             "npv", "F_score", "fleiss_kappa"] 
18 19 20 -def multivariate_normal(mu, Sigma, n=1):
21 """Generates n random vectors from the multivariate normal distribution 22 with mean mu and covariance matrix Sigma. 23 24 This function is faster (roughly 11 times faster for a 600-by-4000 matrix 25 on my computer) than numpy.random.multivariate_normal. This method differs 26 from numpy's function in that it uses the Cholesky factorisation. Note 27 that this requires the covariance matrix to be positive definite, as 28 opposed to positive semi-definite in the numpy case. 29 30 See details at: https://en.wikipedia.org/wiki/ 31 Multivariate_normal_distribution#Drawing_values_from_the_distribution 32 33 Parameters 34 ---------- 35 mu : Numpy array, shape (n, 1). The mean vector. 36 37 Sigma : Numpy array, shape (p, p). The covariance matrix. 38 39 n : Integer. The number of multivariate normal vectors to generate. 40 41 Example 42 ------- 43 >>> import parsimony.utils.stats as stats 44 >>> import numpy as np 45 >>> np.random.seed(42) 46 >>> n, p = 50000, 100 47 >>> mu = np.random.rand(p, 1) 48 >>> alpha = 0.01 49 >>> Sigma = alpha * np.random.rand(p, p) + (1 - alpha) * np.eye(p, p) 50 >>> M = stats.multivariate_normal(mu, Sigma, n) 51 >>> mean = np.mean(M, axis=0) 52 >>> S = np.dot((M - mean).T, (M - mean)) * (1.0 / float(n - 1)) 53 >>> round(np.linalg.norm(Sigma - S), 14) 54 0.51886218849785 55 """ 56 # Type check. 57 n = max(1, int(n)) 58 mu = np.array(mu) 59 if not isinstance(Sigma, np.ndarray): 60 Sigma = np.array(Sigma) 61 62 # Check input dimensions. 63 if len(Sigma.shape) != 2 or Sigma.shape[0] != Sigma.shape[1]: 64 raise ValueError("Sigma must be a square matrix.") 65 p = Sigma.shape[0] 66 if not (len(mu.shape) == 1 or \ 67 (len(mu.shape) == 2 and min(mu.shape) == 1)): 68 raise ValueError("The mean 'mu' must be 1 dimensional or " \ 69 "p-by-1 dimensional") 70 mu = mu.reshape((max(mu.shape),)) 71 if mu.shape[0] != Sigma.shape[0]: 72 raise ValueError("Mu and Sigma must have matching dimensions.") 73 74 A = np.linalg.cholesky(Sigma) # Sigma = A.A' 75 z = np.random.randn(p, n) # Independent standard normal vector. 76 77 # Affine transformation property. 78 rand = mu[:, np.newaxis] + np.dot(A, z) 79 80 return rand.T
81
82 83 -def sensitivity(cond, test):
84 """A test's ability to identify a condition correctly. 85 86 Also called true positive rate or recall. 87 88 Parameters 89 ---------- 90 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 91 92 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 93 94 Example 95 ------- 96 >>> import parsimony.utils.stats as stats 97 >>> import numpy as np 98 >>> np.random.seed(42) 99 >>> p = 2030 100 >>> cond = np.zeros((p, 1)) 101 >>> test = np.zeros((p, 1)) 102 >>> stats.sensitivity(cond, test) 103 1.0 104 >>> stats.sensitivity(cond, np.logical_not(test)) 105 1.0 106 >>> cond[:30] = 1.0 107 >>> test[:30] = 1.0 108 >>> test[:10] = 0.0 109 >>> test[-180:] = 1.0 110 >>> round(stats.sensitivity(cond, test), 2) 111 0.67 112 """ 113 true_pos = np.logical_and((cond == 1), (test == 1)) 114 false_neg = np.logical_and((cond == 1), (test == 0)) 115 116 TP = float(true_pos.sum()) 117 FN = float(false_neg.sum()) 118 119 if FN > 0: 120 value = TP / (TP + FN) 121 else: 122 value = 1.0 # TODO: Is this really correct? 123 124 return value
125
126 127 -def specificity(cond, test):
128 """A test's ability to exclude a condition correctly. 129 130 Also called true negative rate. 131 132 Parameters 133 ---------- 134 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 135 136 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 137 138 Example 139 ------- 140 >>> import parsimony.utils.stats as stats 141 >>> import numpy as np 142 >>> np.random.seed(42) 143 >>> p = 2030 144 >>> cond = np.zeros((p, 1)) 145 >>> test = np.zeros((p, 1)) 146 >>> stats.specificity(cond, test) 147 1.0 148 >>> stats.specificity(cond, np.logical_not(test)) 149 0.0 150 >>> cond[:30] = 1.0 151 >>> test[:30] = 1.0 152 >>> test[:10] = 0.0 153 >>> test[-180:] = 1.0 154 >>> round(stats.specificity(cond, test), 2) 155 0.91 156 """ 157 true_neg = np.logical_and((cond == 0), (test == 0)) 158 false_pos = np.logical_and((cond == 0), (test == 1)) 159 160 TN = float(true_neg.sum()) 161 FP = float(false_pos.sum()) 162 163 if FP > 0: 164 value = TN / (TN + FP) 165 else: 166 value = 1.0 167 168 return value
169
170 171 -def ppv(cond, test):
172 """A test's ability to correctly identify positive outcomes. 173 174 Short for positive predictive value. Also called precision. 175 176 Parameters 177 ---------- 178 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 179 180 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 181 182 Example 183 ------- 184 >>> import parsimony.utils.stats as stats 185 >>> import numpy as np 186 >>> np.random.seed(42) 187 >>> p = 2030 188 >>> cond = np.zeros((p, 1)) 189 >>> test = np.zeros((p, 1)) 190 >>> stats.ppv(cond, test) 191 1.0 192 >>> stats.ppv(cond, np.logical_not(test)) 193 0.0 194 >>> cond[:30] = 1.0 195 >>> test[:30] = 1.0 196 >>> test[:10] = 0.0 197 >>> test[-180:] = 1.0 198 >>> round(stats.ppv(cond, test), 2) 199 0.1 200 """ 201 true_pos = np.logical_and((cond == 1), (test == 1)) 202 false_pos = np.logical_and((cond == 0), (test == 1)) 203 204 TP = float(true_pos.sum()) 205 FP = float(false_pos.sum()) 206 207 if FP > 0: 208 value = TP / (TP + FP) 209 else: 210 value = 1.0 211 212 return value
213
214 215 @deprecated("ppv") 216 -def precision(cond, test):
217 return ppv(cond, test)
218
219 220 -def npv(cond, test):
221 """A test's ability to correctly identify negative outcomes. 222 223 The negative predictive value, NPV. 224 225 Parameters 226 ---------- 227 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 228 229 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 230 231 Example 232 ------- 233 >>> import parsimony.utils.stats as stats 234 >>> import numpy as np 235 >>> np.random.seed(42) 236 >>> p = 2030 237 >>> cond = np.zeros((p, 1)) 238 >>> test = np.zeros((p, 1)) 239 >>> stats.npv(cond, test) 240 1.0 241 >>> stats.npv(cond, np.logical_not(test)) 242 1.0 243 >>> cond[:30] = 1.0 244 >>> test[:30] = 1.0 245 >>> test[:10] = 0.0 246 >>> test[-180:] = 1.0 247 >>> round(stats.npv(cond, test), 3) 248 0.995 249 """ 250 true_neg = np.logical_and((cond == 0), (test == 0)) 251 false_neg = np.logical_and((cond == 1), (test == 0)) 252 253 TN = float(true_neg.sum()) 254 FN = float(false_neg.sum()) 255 256 if FN > 0: 257 value = TN / (TN + FN) 258 else: 259 value = 1.0 260 261 return value
262
263 264 -def accuracy(cond, test):
265 """The degree of correctly estimated outcomes. 266 267 Parameters 268 ---------- 269 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 270 271 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 272 273 Example 274 ------- 275 >>> import parsimony.utils.stats as stats 276 >>> import numpy as np 277 >>> np.random.seed(42) 278 >>> p = 2030 279 >>> cond = np.zeros((p, 1)) 280 >>> test = np.zeros((p, 1)) 281 >>> stats.accuracy(cond, test) 282 1.0 283 >>> stats.accuracy(cond, np.logical_not(test)) 284 0.0 285 >>> cond[:30] = 1.0 286 >>> test[:30] = 1.0 287 >>> test[:10] = 0.0 288 >>> test[-180:] = 1.0 289 >>> round(stats.accuracy(cond, test), 2) 290 0.91 291 """ 292 true_pos = np.logical_and((cond == 1), (test == 1)) 293 true_neg = np.logical_and((cond == 0), (test == 0)) 294 295 TP = float(true_pos.sum()) 296 TN = float(true_neg.sum()) 297 298 n = np.prod(cond.shape) 299 300 value = (TP + TN) / float(n) 301 302 return value
303
304 305 -def F_score(cond, test):
306 """A measure of a test's accuracy by a weighted average of the precision 307 and sensitivity. 308 309 Also called the harmonic mean of precision and sensitivity. 310 311 Parameters 312 ---------- 313 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 314 315 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 316 317 Example 318 ------- 319 >>> import parsimony.utils.stats as stats 320 >>> import numpy as np 321 >>> np.random.seed(42) 322 >>> p = 2030 323 >>> cond = np.zeros((p, 1)) 324 >>> test = np.zeros((p, 1)) 325 >>> stats.F_score(cond, test) 326 1.0 327 >>> stats.F_score(cond, np.logical_not(test)) 328 0.0 329 >>> cond[:30] = 1.0 330 >>> test[:30] = 1.0 331 >>> test[:10] = 0.0 332 >>> test[-180:] = 1.0 333 >>> round(stats.F_score(cond, test), 2) 334 0.17 335 """ 336 PR = precision(cond, test) 337 RE = sensitivity(cond, test) 338 339 value = 2.0 * PR * RE / (PR + RE) 340 341 return value
342
343 344 -def alpha(cond, test):
345 """False positive rate or type I error. 346 347 Parameters 348 ---------- 349 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 350 351 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 352 353 Example 354 ------- 355 >>> import parsimony.utils.stats as stats 356 >>> import numpy as np 357 >>> np.random.seed(42) 358 >>> p = 2030 359 >>> cond = np.zeros((p, 1)) 360 >>> test = np.zeros((p, 1)) 361 >>> stats.alpha(cond, test) 362 0.0 363 >>> stats.alpha(cond, np.logical_not(test)) 364 1.0 365 >>> cond[:30] = 1.0 366 >>> test[:30] = 1.0 367 >>> test[:10] = 0.0 368 >>> test[-180:] = 1.0 369 >>> round(stats.alpha(cond, test), 2) 370 0.09 371 """ 372 return 1.0 - specificity(cond, test)
373
374 375 -def beta(cond, test):
376 """False negative rate or type II error. 377 378 Parameters 379 ---------- 380 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 381 382 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 383 384 Example 385 ------- 386 >>> import parsimony.utils.stats as stats 387 >>> import numpy as np 388 >>> np.random.seed(42) 389 >>> p = 2030 390 >>> cond = np.zeros((p, 1)) 391 >>> test = np.zeros((p, 1)) 392 >>> stats.beta(cond, test) 393 0.0 394 >>> stats.beta(cond, np.logical_not(test)) 395 0.0 396 >>> cond[:30] = 1.0 397 >>> test[:30] = 1.0 398 >>> test[:10] = 0.0 399 >>> test[-180:] = 1.0 400 >>> round(stats.beta(cond, test), 2) 401 0.33 402 """ 403 return 1.0 - sensitivity(cond, test)
404
405 406 -def power(cond, test):
407 """Statistical power for a test. The probability that it correctly rejects 408 the null hypothesis when the null hypothesis is false. 409 410 Parameters 411 ---------- 412 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 413 414 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 415 416 Example 417 ------- 418 >>> import parsimony.utils.stats as stats 419 >>> import numpy as np 420 >>> np.random.seed(42) 421 >>> p = 2030 422 >>> cond = np.zeros((p, 1)) 423 >>> test = np.zeros((p, 1)) 424 >>> stats.power(cond, test) 425 1.0 426 >>> stats.power(cond, np.logical_not(test)) 427 1.0 428 >>> cond[:30] = 1.0 429 >>> test[:30] = 1.0 430 >>> test[:10] = 0.0 431 >>> test[-180:] = 1.0 432 >>> round(stats.power(cond, test), 2) 433 0.67 434 """ 435 return 1.0 - beta(cond, test)
436
437 438 -def likelihood_ratio_positive(cond, test):
439 """Assesses the value of performing a diagnostic test for the positive 440 outcome. 441 442 Parameters 443 ---------- 444 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 445 446 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 447 448 Example 449 ------- 450 >>> import parsimony.utils.stats as stats 451 >>> import numpy as np 452 >>> np.random.seed(42) 453 >>> p = 2030 454 >>> cond = np.zeros((p, 1)) 455 >>> test = np.zeros((p, 1)) 456 >>> stats.likelihood_ratio_positive(cond, test) 457 inf 458 >>> stats.likelihood_ratio_positive(cond, np.logical_not(test)) 459 1.0 460 >>> cond[:30] = 1.0 461 >>> test[:30] = 1.0 462 >>> test[:10] = 0.0 463 >>> test[-180:] = 1.0 464 >>> round(stats.likelihood_ratio_positive(cond, test), 1) 465 7.4 466 """ 467 sens = sensitivity(cond, test) 468 spec = specificity(cond, test) 469 470 if spec == 1.0: 471 return np.inf 472 else: 473 return sens / (1.0 - spec)
474
475 476 -def likelihood_ratio_negative(cond, test):
477 """Assesses the value of performing a diagnostic test for the negative 478 outcome. 479 480 Parameters 481 ---------- 482 cond : Numpy array, boolean or 0/1 integer. The "true", known condition. 483 484 test : Numpy array, boolean or 0/1 integer. The estimated outcome. 485 486 Example 487 ------- 488 >>> import parsimony.utils.stats as stats 489 >>> import numpy as np 490 >>> np.random.seed(42) 491 >>> p = 2030 492 >>> cond = np.zeros((p, 1)) 493 >>> test = np.zeros((p, 1)) 494 >>> stats.likelihood_ratio_negative(cond, test) 495 0.0 496 >>> stats.likelihood_ratio_negative(cond, np.logical_not(test)) 497 inf 498 >>> cond[:30] = 1.0 499 >>> test[:30] = 1.0 500 >>> test[:10] = 0.0 501 >>> test[-180:] = 1.0 502 >>> round(stats.likelihood_ratio_negative(cond, test), 2) 503 0.37 504 """ 505 sens = sensitivity(cond, test) 506 spec = specificity(cond, test) 507 508 if spec == 0.0: 509 return np.inf 510 else: 511 return (1.0 - sens) / spec
512
513 514 -def fleiss_kappa(W, k):
515 """Computes Fleiss' kappa for a set of variables classified into k 516 categories by a number of different raters. 517 518 W is a matrix with shape (variables, raters) with k categories between 519 0, ..., k - 1. 520 """ 521 N, n = W.shape 522 if n <= 1: 523 raise ValueError("At least two ratings are needed") 524 A = np.zeros((N, k)) 525 Nn = N * n 526 p = [0.0] * k 527 for j in xrange(k): 528 A[:, j] = np.sum(W == j, axis=1) 529 530 p[j] = np.sum(A[:, j]) / float(Nn) 531 532 P = [0.0] * N 533 for i in xrange(N): 534 for j in xrange(k): 535 P[i] += A[i, j] ** 2.0 536 P[i] -= n 537 P[i] /= float(n * (n - 1)) 538 539 P_ = sum(P) / float(N) 540 Pe = sum([pj ** 2.0 for pj in p]) 541 542 if abs(Pe - 1) < consts.TOLERANCE: 543 kappa = 1.0 544 else: 545 kappa = (P_ - Pe) / (1.0 - Pe) 546 if kappa > 1.0: 547 kappa = 1.0 548 549 return kappa
550