Package parsimony :: Package utils :: Module resampling
[hide private]
[frames] | no frames]

Source Code for Module parsimony.utils.resampling

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Created on Mon Jun 16 10:00:06 2014 
  4   
  5  Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 
  6   
  7  @author:  Tommy Löfstedt 
  8  @email:   lofstedt.tommy@gmail.com 
  9  @license: BSD 3-clause. 
 10  """ 
 11  import numpy as np 
 12   
 13  __all__ = ["k_fold", "stratified_k_fold", 
 14             "bootstrap", "stratified_bootstrap"] 
 15   
 16   
17 -def k_fold(n, K=7):
18 """K-fold cross validation iterator. 19 20 Returns indices for training and test sets. 21 22 Parameters 23 ---------- 24 n : Positive integer greater than one. The number of samples. 25 26 K : Positive integer greater than or equal to two. The number of folds. 27 """ 28 all_ids = set(range(n)) 29 for k in xrange(K): 30 test = range(k, n, K) 31 train = all_ids.difference(test) 32 33 yield list(train), test
34 35
36 -def stratified_k_fold(y, K=7):
37 """Stratified k-fold cross validation iterator. 38 39 Returns indices for training and test sets. 40 41 Parameters 42 ---------- 43 y : Numpy array with n > 1 elements. The class labels. These labels are 44 used to stratify the folds. 45 46 K : Positive integer greater than or equal to two. The number of folds. 47 """ 48 y = np.array(y) 49 n = np.prod(y.shape) 50 y = np.reshape(y, (n, 1)) 51 52 # Assign the class labels to different folds. 53 labels, y_inverse = np.unique(y, return_inverse=True) 54 count = np.bincount(y_inverse) 55 classes = -np.ones(n) 56 for i in xrange(count.shape[0]): 57 c = count[i] 58 v = np.mod(np.arange(c), K) 59 60 classes[y_inverse == i] = v 61 62 # Assign any leftovers to existing classes. 63 m = np.max(classes) 64 if m > K - 1: 65 ind = np.where(classes == m)[0] 66 for i in range(len(ind)): 67 classes[ind[i]] = i 68 69 # Loop over the stratified classes and yield the given train and test set. 70 all_ids = set(range(n)) 71 for k in range(K): 72 test = np.where(classes == k)[0].tolist() 73 train = list(all_ids.difference(test)) 74 75 yield train, test
76 77
78 -def bootstrap(n, B=100, seed=None):
79 """Bootstrap sample iterator. 80 81 Returns indices for a bootstrap training set. 82 83 Parameters 84 ---------- 85 n : Positive integer greater than one. The number of samples. 86 87 B : Positive integer greater than or equal to two. The number of bootstrap 88 samples to draw. 89 90 seed : Integer. A random seed to initialise the random number generator 91 with. Use in order to obtain deterministic results. The seed is not 92 used if the seed is None. 93 """ 94 if seed is not None: 95 np.random.seed(seed) 96 97 for b in xrange(B): 98 sample = np.random.randint(0, n, size=n).tolist() 99 100 yield sample
101 102
103 -def stratified_bootstrap(y, B=100, seed=None):
104 """Stratified bootstrap sample iterator. 105 106 Returns indices for a bootstrap training set. 107 108 Parameters 109 ---------- 110 y : Numpy array with n > 1 elements. The class labels. These labels are 111 used to stratify the samples. 112 113 B : Positive integer greater than or equal to two. The number of bootstrap 114 samples to draw. 115 116 seed : Integer. A random seed to initialise the random number generator 117 with. Use in order to obtain deterministic results. The seed is not 118 used if the seed is None. 119 """ 120 y = np.array(y) 121 n = np.prod(y.shape) 122 y = np.reshape(y, (n, 1)) 123 124 if seed is not None: 125 np.random.seed(seed) 126 127 # Assign the class labels to different folds 128 labels, y_inverse = np.unique(y, return_inverse=True) 129 count = np.bincount(y_inverse).tolist() 130 for b in xrange(B): 131 sample = -np.ones(y.shape, dtype=np.int) 132 for i in xrange(len(count)): 133 c = count[i] # Current class 134 cls = y_inverse == i # Find class among samples 135 i = np.where(cls)[0] # Class indices 136 # Class sample 137 s = np.random.randint(0, c, size=c) 138 139 # Save the samples 140 sample[cls] = i[s].reshape((c, 1)) 141 142 yield sample.ravel().tolist()
143