1
2 """
3 Created on Tue Mar 18 10:57:39 2014
4
5 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
6
7 @author: Edouard Duchesnay
8 @email: edouard.duchesnay@cea.fr
9 @license: BSD 3-clause.
10 """
11 import numpy as np
12 from scipy import ndimage
13 from ..utils import Dot, ObjImage, spatial_smoothing, corr_to_coef
14
15
16 -def load(n_samples=100, shape=(30, 30, 1),
17 r2=.75,
18 sigma_spatial_smoothing=1,
19 obj_pix_ratio=2.,
20 model="independant",
21 random_seed=None):
22 """Generates regression samples (images + target variable) and beta.
23
24 The covariance structure is controlled both at a pixel
25 level (spatial smoothing) and object level. Objects are groups
26 of pixels sharing a covariance that stem from a latent variable.
27 beta is non null within objects (default is five dots).
28 Then y is obtained with y = X * beta + noise, where beta is scalled such
29 that r_square(y, X * beta) = r2 and noise is sampled according to N(0, 1).
30
31 Parameters
32 ----------
33 n_samples: Integer. Number of samples. Default is 100.
34
35 shape: Tuple or list of sample shape. Default is (30, 30, 1).
36
37 r2: Float. The desire R-squared (explained variance) ie.:
38 r_square(y, X * beta) = r2 (Default is .75)
39
40 sigma_spatial_smoothing: Float. Standard deviation for Gaussian kernel
41 (default is 1). High value promotes spatial correlation pixels.
42
43 model: string or a dict (default "independant")
44 If model is "independant":
45 # Each point has an independant latent
46 l1=1., l2=1., l3=1., l4=1., l5=1.,
47 # No shared variance
48 l12=0., l45=0., l12345=0.,
49 # Five dots contribute equally
50 b1=1., b2=1., b3=1., b4=-1., b5=-1.
51
52 if model is a dictionary:
53 update (overwrite) independant model by dictionnary parameter
54 Example set betas of points 4 and 5 to 1
55 dict(b4=1., b5=1.)
56
57 If model is "redundant":
58 # Point-level signal in dots 1 an 2 stem from shared latent
59 l1=0., l2=0., l12 =1.,
60 # l3 is independant
61 l3=1.,
62 # Point-level signal in dots 4 an 5 stem from shared latent
63 l4=0., l5=0., l45=1.,
64 # No global shared variance
65 l12345 = 0.,
66 # Five dots contribute equally
67 b1=1., b2=1., b3=1., b4=-1., b5=-1.
68
69 If model is "suppressor":
70 # Point-level signal in dot 2 stem only from shared latent
71 l1=1, l2=0., l12=1.,
72 # l3 is independant
73 l3 = 1.,
74 # Point-level signal in dot 5 stem from shared latent
75 l4=1., l5=0., l45=1.,
76 # No global shared variance
77 l12345 = 0.,
78 # Dot 2 suppresses shared signal with dot 1, dot 5 suppresses dot 4
79 b1=1., b2=-1., b3=1., b4=1., b5=-1.
80
81 y = X1 - X2 + X3 + X4 - X5 + noise
82 y = l1 + l12 - l12 + l3 + l4 + l45 - l45 + noise
83 y = l1 + l3 + l4 + noise
84 So pixels of X2 and X5 are not correlated with the target y so they
85 will not be detected by univariate analysis. However, they
86 are usefull since they are suppressing unwilling variance that stem
87 from latents l12 and l45.
88
89 obj_pix_ratio: Float. Controls the ratio between object-level signal
90 and pixel-level signal for pixels within objects. If
91 obj_pix_ratio == 1 then 100% of the signal of pixels within
92 the same object is shared (ie.: no pixel level) signal. If
93 obj_pix_ratio == 0 then all the signal is pixel specific.
94 High obj_pix_ratio promotes spatial correlation between
95 pixels of the same object.
96
97 random_seed: None or integer. See numpy.random.seed(). If not None, it can
98 be used to obtain reproducable samples.
99
100 Returns
101 -------
102 X3d: Numpy array of shape [n_sample, shape]. The input features.
103
104 y: Numpy array of shape [n_sample, 1]. The target variable.
105
106 beta3d: Numpy array of shape [shape,]. It is the beta such that
107 y = X * beta + noise.
108
109 Details
110 -------
111 The general procedure is:
112
113 1) For each pixel i, Generate independant variables Xi ~ N(0, 1)
114
115 2) Add object level structure corresponding to the five dots:
116 - Sample five latent variables ~ N(0, 1): l1, l3, l4, l12, l45,
117 l12345.
118 l1: latent (shared variance) for all pixels of point 1.
119 ...
120 l5: latent (shared variance) for all pixels of point 5.
121 l12: latent (shared variance) for all pixels of point 1 and 2.
122 l45: latent (shared variance) for all pixels of point 4 and 5 .
123 l12345: latent (shared variance) for all pixels of point 1, 2, 3, 4
124 and 5.
125
126 - Pixel i of dots X1, X2, X3, X4, X5 are sampled as:
127 X1i = l1 + l12 + l12345 + Xi
128 X2i = l2 + l12 + l12345 + Xi
129 X3i = l3 + l12345 + Xi
130 X4i = l4 + l45 + l12345 + Xi
131 X5i = l5 + l45 + l12345 + Xi
132 Note that:
133 Pixels of dot X1 share a common variance that stem from l1, l12
134 and l12345
135 Pixels of dot X2 share a common variance that stem from l1, l12
136 and l12345
137 Pixels of dot X1 and pixel of dot X2 share a common variance that
138 stem from l12.
139 etc.
140
141 4) Spatial Smoothing.
142
143 5) Model: y = X beta + noise
144 - Betas are null outside dots, and b1, b2, b3, b4, b5 within dots
145 - Sample noise ~ N(0, 1)
146 - Compute X beta then scale beta such that: r_squared(y, X beta) = r2
147 Return X, y, beta
148
149
150 Examples
151 --------
152 >>> import numpy as np
153 >>> import matplotlib.pyplot as plot
154 >>> from parsimony import datasets
155 >>> n_samples = 100
156 >>> shape = (11, 11, 1)
157 >>> X3d, y, beta3d = datasets.regression.dice5.load(n_samples=n_samples,
158 ... shape=shape, r2=.5, random_seed=1)
159 """
160 signal_std_pix = 1.
161 signal_std_obj = float(obj_pix_ratio) / signal_std_pix
162 mu_e = 0
163 if shape[0] < 5 or shape[1] < 5:
164 raise ValueError("Shape too small. The minimun is (5, 5, 0)")
165
166 if len(shape) == 2:
167 shape = tuple(list(shape) + [1])
168
169 n_features = np.prod(shape)
170 nx, ny, nz = shape
171
172
173
174
175 if random_seed is not None:
176 rnd_state = np.random.get_state()
177 np.random.seed(random_seed)
178 X = np.random.normal(mu_e, signal_std_pix, n_samples * n_features)
179 X3d = X.reshape(n_samples, nx, ny, nz)
180
181
182
183 model_ = dict(
184
185 l1=1., l2=1., l3=1., l4=1., l5=1.,
186
187 l12=0., l45=0., l12345=0.,
188
189 b1=1., b2=1., b3=1., b4=-1., b5=-1.)
190 if isinstance(model, dict):
191 model_.update(model)
192 elif model is "redundant":
193 model_ = dict(
194
195 l1=0., l2=0., l12=1.,
196
197 l3=1.,
198
199 l4=0., l5=0., l45=1.,
200
201 l12345=0.,
202
203 b1=1., b2=1., b3=1., b4=-1., b5=-1.)
204 elif model is "suppressor":
205 model_ = dict(
206
207 l1=1, l2=0., l12=1.,
208
209 l3=1.,
210
211 l4=1., l5=0., l45=1.,
212
213 l12345=0.,
214
215 b1=1., b2=-1., b3=1., b4=1., b5=-1.)
216 model_["l1"] *= signal_std_obj
217 model_["l2"] *= signal_std_obj
218 model_["l3"] *= signal_std_obj
219 model_["l4"] *= signal_std_obj
220 model_["l5"] *= signal_std_obj
221 model_["l12"] *= signal_std_obj
222 model_["l45"] *= signal_std_obj
223 model_["l12345"] *= signal_std_obj
224
225
226 objects = dice_five_with_union_of_pairs(shape)
227 d1, d2, d3, d4, d5, union12, union45, union12345 = objects
228 d1.std = model_["l1"]
229 d1.beta = model_["b1"]
230 d2.std = model_["l2"]
231 d2.beta = model_["b2"]
232 union12.std = model_["l12"]
233 union12.beta = 0.
234 d4.std = model_["l4"]
235 d4.beta = model_["b4"]
236 d5.std = model_["l5"]
237 d5.beta = model_["b5"]
238 union45.std = model_["l45"]
239 union45.beta = 0.
240 d3.std = model_["l3"]
241 d3.beta = model_["b3"]
242 union12345.std = model_["l12345"]
243 union12345.beta = 0.
244
245
246 X3d, support = ObjImage.object_model(objects, X3d)
247
248
249 if sigma_spatial_smoothing != 0:
250 X3d = spatial_smoothing(X3d, sigma_spatial_smoothing, mu_e,
251 obj_pix_ratio)
252 X = X3d.reshape((X3d.shape[0], np.prod(X3d.shape[1:])))
253 X -= X.mean(axis=0)
254 X /= X.std(axis=0)
255
256
257
258 beta3d = np.zeros(X3d.shape[1:])
259
260 for k in xrange(len(objects)):
261 o = objects[k]
262 beta3d[o.get_mask()] += o.beta
263 beta3d = ndimage.gaussian_filter(beta3d, sigma=sigma_spatial_smoothing)
264 beta = beta3d.ravel()
265
266
267
268 X = X3d.reshape(n_samples, np.prod(shape))
269 Xbeta = np.dot(X, beta)
270
271 if r2 < 1:
272 noise = np.random.normal(0, 1, Xbeta.shape[0])
273 coef = corr_to_coef(v_x=np.var(Xbeta), v_e=np.var(noise),
274 cov_xe=np.cov(Xbeta, noise)[0, 1], cor=np.sqrt(r2))
275 beta *= coef
276 y = np.dot(X, beta) + noise
277 else:
278 y = np.dot(X, beta)
279
280 if False:
281 import pylab as plt
282 X = X3d.reshape((n_samples, nx * ny))
283 Xc = (X - X.mean(axis=0)) / X.std(axis=0)
284 yc = (y - y.mean()) / y.std()
285 cor = np.dot(Xc.T, yc).reshape(nx, ny) * (1.0 / y.shape[0])
286 cax = plt.matshow(cor, cmap=plt.cm.coolwarm)
287 plt.colorbar(cax)
288 plt.show()
289
290 if random_seed is not None:
291 np.random.set_state(rnd_state)
292
293 return X3d, y.reshape((n_samples, 1)), beta3d
294
295
296
297
299 """Seven objects, five dot + union1 = dots 1 + 2 and union2 = dots 4 + 5
300
301 Examples
302 --------
303 shape = (5, 5, 1)
304 map = np.zeros(shape)
305 info = np.zeros(shape)
306 for o in dice_five_with_union_of_pairs(shape):
307 map[o.get_mask()] += 1.
308 import matplotlib.pyplot as plt
309 plot.matshow(map.squeeze())
310 plt.show()
311 """
312 nx, ny, nz = shape
313 if nx < 5 or ny < 5:
314 raise ValueError("Shape too small minimun is (5, 5, 0)")
315 s_obj = np.max([1, np.floor(np.max(shape) / 7)])
316 k = 1
317 c1 = np.floor((k * nx / 4., ny / 4., nz / 2.))
318 d1 = Dot(center=c1, size=s_obj, shape=shape)
319 c2 = np.floor((k * nx / 4., ny - (ny / 4.), nz / 2.))
320 d2 = Dot(center=c2, size=s_obj, shape=shape)
321 union12 = ObjImage(mask=d1.get_mask() + d2.get_mask())
322 k = 3
323 c4 = np.floor((k * nx / 4., ny / 4., nz / 2.))
324 d4 = Dot(center=c4, size=s_obj, shape=shape)
325 c5 = np.floor((k * nx / 4., ny - (ny / 4.), nz / 2.))
326 d5 = Dot(center=c5, size=s_obj, shape=shape)
327 union45 = ObjImage(mask=d4.get_mask() + d5.get_mask())
328
329 c3 = np.floor((nx / 2., ny / 2., nz / 2.))
330 d3 = Dot(center=c3, size=s_obj, shape=shape)
331 union12345 = ObjImage(mask=d1.get_mask() + d2.get_mask() + d3.get_mask() +
332 d4.get_mask() + d5.get_mask())
333 return [d1, d2, d3, d4, d5, union12, union45, union12345]
334