1
2 """
3 Created on Mon Jun 16 10:00:06 2014
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Tommy Löfstedt
8 @email: lofstedt.tommy@gmail.com
9 @license: BSD 3-clause.
10 """
11 import numpy as np
12
13 __all__ = ["k_fold", "stratified_k_fold",
14 "bootstrap", "stratified_bootstrap"]
15
16
18 """K-fold cross validation iterator.
19
20 Returns indices for training and test sets.
21
22 Parameters
23 ----------
24 n : Positive integer greater than one. The number of samples.
25
26 K : Positive integer greater than or equal to two. The number of folds.
27 """
28 all_ids = set(range(n))
29 for k in xrange(K):
30 test = range(k, n, K)
31 train = all_ids.difference(test)
32
33 yield list(train), test
34
35
37 """Stratified k-fold cross validation iterator.
38
39 Returns indices for training and test sets.
40
41 Parameters
42 ----------
43 y : Numpy array with n > 1 elements. The class labels. These labels are
44 used to stratify the folds.
45
46 K : Positive integer greater than or equal to two. The number of folds.
47 """
48 y = np.array(y)
49 n = np.prod(y.shape)
50 y = np.reshape(y, (n, 1))
51
52
53 labels, y_inverse = np.unique(y, return_inverse=True)
54 count = np.bincount(y_inverse)
55 classes = -np.ones(n)
56 for i in xrange(count.shape[0]):
57 c = count[i]
58 v = np.mod(np.arange(c), K)
59
60 classes[y_inverse == i] = v
61
62
63 m = np.max(classes)
64 if m > K - 1:
65 ind = np.where(classes == m)[0]
66 for i in range(len(ind)):
67 classes[ind[i]] = i
68
69
70 all_ids = set(range(n))
71 for k in range(K):
72 test = np.where(classes == k)[0].tolist()
73 train = list(all_ids.difference(test))
74
75 yield train, test
76
77
79 """Bootstrap sample iterator.
80
81 Returns indices for a bootstrap training set.
82
83 Parameters
84 ----------
85 n : Positive integer greater than one. The number of samples.
86
87 B : Positive integer greater than or equal to two. The number of bootstrap
88 samples to draw.
89
90 seed : Integer. A random seed to initialise the random number generator
91 with. Use in order to obtain deterministic results. The seed is not
92 used if the seed is None.
93 """
94 if seed is not None:
95 np.random.seed(seed)
96
97 for b in xrange(B):
98 sample = np.random.randint(0, n, size=n).tolist()
99
100 yield sample
101
102
104 """Stratified bootstrap sample iterator.
105
106 Returns indices for a bootstrap training set.
107
108 Parameters
109 ----------
110 y : Numpy array with n > 1 elements. The class labels. These labels are
111 used to stratify the samples.
112
113 B : Positive integer greater than or equal to two. The number of bootstrap
114 samples to draw.
115
116 seed : Integer. A random seed to initialise the random number generator
117 with. Use in order to obtain deterministic results. The seed is not
118 used if the seed is None.
119 """
120 y = np.array(y)
121 n = np.prod(y.shape)
122 y = np.reshape(y, (n, 1))
123
124 if seed is not None:
125 np.random.seed(seed)
126
127
128 labels, y_inverse = np.unique(y, return_inverse=True)
129 count = np.bincount(y_inverse).tolist()
130 for b in xrange(B):
131 sample = -np.ones(y.shape, dtype=np.int)
132 for i in xrange(len(count)):
133 c = count[i]
134 cls = y_inverse == i
135 i = np.where(cls)[0]
136
137 s = np.random.randint(0, c, size=c)
138
139
140 sample[cls] = i[s].reshape((c, 1))
141
142 yield sample.ravel().tolist()
143