Package parsimony :: Package datasets :: Package simulate :: Module simulate
[hide private]
[frames] | no frames]

Source Code for Module parsimony.datasets.simulate.simulate

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Created on Mon May 12 15:46:47 2014 
  4   
  5  Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 
  6   
  7  @author:  Tommy Löfstedt 
  8  @email:   tommy.loefstedt@cea.fr 
  9  @license: BSD 3-clause. 
 10  """ 
 11  import abc 
 12   
 13  import numpy as np 
 14   
 15  import utils 
 16   
 17  __all__ = ["SimulatedData", "LinearRegressionData"] 
18 19 20 -class SimulatedData(object):
21 22 __metaclass__ = abc.ABCMeta 23 24 @abc.abstractmethod
25 - def load(self, beta):
26 raise NotImplementedError('Abstract method "load" must be ' 27 'specialised!')
28
29 30 -class LinearRegressionData(SimulatedData):
31 """Returns data for linear regression that is generated such that we know 32 the exact solution. 33 34 The data generated by this function is fit to the Linear regression + L1 + 35 L2 + Smoothed total variation function, i.e.: 36 37 f(b) = (1 / 2).|Xb - y|²_2 + sum_{i=1}^N l_i.P_i(b), 38 39 where the P_i(.) are penalty functions. 40 41 Parameters 42 ---------- 43 penalties : List or tuple. The penalties to add to the linear regression 44 function. 45 46 X0 : Numpy array (n-by-p). The initial matrix to use when building data. 47 This matrix carries the desired correlation structure of the 48 generated data. The generated data will be a column-scaled version 49 of this matrix. 50 51 e : Numpy array (n-by-1). The error vector e = Xb - y. This vector carries 52 the desired distribution of the residual. 53 54 snr : Positive float. Signal-to-noise ratio between model and residual. 55 56 intercept : Boolean. Whether or not to include an intercept variable. This 57 variable is not penalised. Note that if intercept is True, then e 58 will be centred. 59 60 Returns 61 ------- 62 X : Numpy array (n-by-p). The generated X matrix. 63 64 y : Numpy array (n-by-1). The generated y vector. 65 66 beta : Numpy array (p-by-1). The regression vector with the correct snr (if 67 snr is given). 68 """
69 - def __init__(self, penalties, X0, e, snr=None, intercept=False):
70 71 super(LinearRegressionData, self).__init__() 72 73 self.penalties = list(penalties) 74 self.X0 = X0 75 self.intercept = bool(intercept) 76 self.e = e - np.mean(e) if intercept else e 77 self.snr = float(snr) if snr is not None else None
78
79 - def load(self, beta):
80 """Generate the simulated data. 81 82 Parameters 83 ---------- 84 beta : Numpy array (p-by-1). The regression vector to generate data 85 from. 86 """ 87 if self.snr != None: 88 old_snr = self.snr 89 self.snr = None 90 try: 91 def f(x): 92 X, y, _ = self.load(x * beta) 93 94 print "snr = %.5f = %.5f = |X.b| / |e| = %.5f / %.5f" \ 95 % (old_snr, np.linalg.norm(np.dot(X, x * beta)) \ 96 / np.linalg.norm(self.e), 97 np.linalg.norm(np.dot(X, x * beta)), 98 np.linalg.norm(self.e)) 99 100 return (np.linalg.norm(np.dot(X, x * beta)) \ 101 / np.linalg.norm(self.e)) - old_snr
102 103 snr = utils.bisection_method(f, 104 low=0.0, 105 high=np.sqrt(old_snr), 106 maxiter=30) 107 108 finally: 109 self.snr = old_snr 110 111 beta = beta * snr 112 113 grad = 0.0 114 for p in self.penalties: 115 grad -= p.grad(beta[1:, :] if self.intercept else beta) 116 117 Mte = np.dot(self.X0.T, self.e) 118 if self.intercept: 119 Mte = Mte[1:, :] 120 121 alpha = np.divide(grad, Mte) 122 123 p = beta.shape[0] 124 125 start = 1 if self.intercept else 0 126 X = np.ones(self.X0.shape) 127 for i in xrange(start, p): 128 X[:, i] = self.X0[:, i] * alpha[i - start, 0] 129 130 y = np.dot(X, beta) - self.e 131 132 return X, y, beta
133 134 if __name__ == "__main__": 135 import doctest 136 doctest.testmod() 137