parsimony.datasets.simulate.correlation

1 # -*- coding: utf-8 -*- 2 """ 3 Generates correlation matrices using two of the approaches described in: 4 5 Hardin & Garcia (2013). A method for generating realistic correlation 6 matrices. 7 8 Created on Wed Jun 19 13:56:24 2013 9 10 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 11 12 @author: Tommy Löfstedt 13 @email: tommy.loefstedt@cea.fr 14 @license: BSD 3-clause. 15 """ 16 import numpy as np 17 18 __all__ = ['ConstantCorrelation', 'ToeplitzCorrelation'] 19 20

21 -def ConstantCorrelation(p=[100], rho=[0.05], delta=0.10, eps=0.5):

22 """ Returns a positive definite matrix, S, corresponding to a block 23 covariance matrix. Each block has the structure: 24 25 [1, ..., rho_k] 26 S_k = [..., 1, ...], 27 [rho_k, ..., 1] 28 29 i.e. 1 on the diagonal and rho_k (on average) outside the diagonal. S then 30 has the structure: 31 32 [S_1, delta, delta] 33 S = [delta, S_i, delta], 34 [delta, delta, S_N] 35 36 i.e. with the groups-correlation matrices on the diagonal and delta (on 37 average) outside. 38 39 Parameters 40 ---------- 41 p : A scalar or a list of scalars with the numbers of variables for each 42 group. 43 44 rho : A scalar or a list of the average correlation between off-diagonal 45 elements of S. 46 47 delta: Baseline noise between groups. Only used if the number of groups is 48 greater than one. The baseline noise is computed as 49 50 delta * rho_min, 51 52 and you must provide a delta such that 0 <= delta < 1. 53 54 eps : Entry-wise random noise. This parameter determines the distribution 55 of the noise. The noise is approximately normally distributed with 56 mean 57 58 delta * min(rho) 59 60 and variance 61 62 (eps * (1 - max(rho))) ** 2.0 / 10. 63 64 You can thus control the noise by this parameter, but note that you 65 must have 66 67 0 <= eps < 1 - max(rho). 68 69 Returns 70 ------- 71 S : The correlation matrix. 72 """ 73 if not isinstance(rho, (list, tuple)): 74 p = [p] 75 rho = [rho] 76 77 K = len(rho) 78 79 M = 10 # Dim. of noise space. uu approx ~N(0, 1 / M) 80 N = 0 81 rho_min = min(rho) 82 rho_max = max(rho) 83 eps = eps * (1.0 - rho_max) 84 delta = delta * rho_min 85 86 for k in xrange(K): 87 N += p[k] 88 89 u = np.random.randn(M, N) 90 # u = (np.random.rand(M, N) * 2.0) - 1.0 91 u *= 1.0 / np.sqrt(np.sum(u ** 2.0, axis=0)) # Normalise. 92 uu = np.dot(u.T, u) # ~N(0, 1 / M) 93 uu[uu > 1.0] = 1.0 94 uu[uu < -1.0] = -1.0 95 96 S = delta + eps * uu 97 98 Nk = 0 99 for k in xrange(K): 100 pk = p[k] 101 Nk += pk 102 103 uuk = uu[Nk - pk:Nk, Nk - pk:Nk] 104 Sk = rho[k] + eps * uuk 105 Sk -= np.diag(np.diag(Sk)) - np.eye(*Sk.shape) 106 107 S[Nk - pk:Nk, Nk - pk:Nk] = Sk 108 109 # k = (N * (1.0 + eps) + 1) / (1.0 - rho_max - eps) 110 # print "cond(S) = ", np.linalg.cond(S) 111 # print "cond(S) <= ", k 112 113 return S

114 115

116 -def ToeplitzCorrelation(p=[100], rho=[0.05], eps=0.5):

117 """ Returns a positive definite matrix, S, corresponding to a block 118 covariance matrix. Each block has the structure: 119 120 [ 1, rho_k^1, rho_k^2, ..., rho_k^{p_k-1}] 121 [ rho_k^1, 1, rho_k^1, ..., rho_k^{p_k-2}] 122 S_k = [ rho_k^2, rho_k^1, 1, ..., ...] 123 [ ..., ..., ..., 1, rho_k^1] 124 [rho_k^{p_k-1}, rho_k^{p_k-2}, ..., rho_k^1, 1] 125 126 i.e. 1 on the diagonal and exponentially decreasing correlations outside 127 the diagonal. S then has the structure: 128 129 [S_1, 0, 0] 130 S = [ 0, S_i, 0], 131 [ 0, 0, S_N] 132 133 i.e. with the group-correlation matrices on the diagonal and zero (on 134 average) outside. 135 136 Parameters 137 ---------- 138 p : A scalar or a list of scalars with the numbers of variables for each 139 group. 140 141 rho : A scalar or a list of the average correlation between off-diagonal 142 elements of S. 143 144 eps : Maximum entry-wise random noise. This parameter determines the 145 distribution of the noise. The noise is approximately normally 146 distributed with zero mean and variance 147 148 (eps * (1.0 - max(rho)) / (1.0 + max(rho))) ** 2.0 / 10. 149 150 You can thus control the noise by this parameter, but note that you 151 must have 152 153 0 <= eps < 1. 154 155 Returns 156 ------- 157 S : The correlation matrix. 158 """ 159 if not isinstance(rho, (list, tuple)): 160 p = [p] 161 rho = [rho] 162 163 K = len(rho) 164 165 M = 10 # Dim. of noise space. uu approx ~N(0, 1 / M) 166 N = sum(p) 167 rho_max = max(rho) 168 eps = eps * (1.0 - rho_max) / (1.0 + rho_max) 169 170 u = np.random.randn(M, N) 171 # u = (np.random.rand(M, N) * 2.0) - 1.0 172 u *= 1.0 / np.sqrt(np.sum(u ** 2.0, axis=0)) # Normalise. 173 uu = np.dot(u.T, u) # ~N(0, 1 / M) 174 175 S = np.zeros((N, N)) 176 Nk = 0 177 for k in xrange(K): 178 pk = p[k] 179 Nk += pk 180 rhok = rho[k] 181 v = [0] * (pk - 1) 182 for i in xrange(0, pk - 1): 183 v[i] = rhok ** (i + 1) 184 185 Sk = np.eye(pk, pk) 186 Sk[0, 1:] = v 187 Sk[1:, 0] = v 188 for i in xrange(1, pk - 1): 189 Sk[i, i + 1:] = v[:-i] 190 Sk[i + 1:, i] = v[:-i] 191 192 S[Nk - pk:Nk, Nk - pk:Nk] = Sk 193 194 S += eps * (uu - np.eye(*uu.shape)) 195 196 # k = (((1.0 + rho_max) / (1.0 - rho_max)) + (N - 1.0) * eps) \ 197 # / (((1.0 - rho_max) / (1.0 + rho_max)) - eps) 198 # print "cond(S) = %.5f <= %.5f" % (np.linalg.cond(S), k) 199 200 return S

201 202 if __name__ == "__main__": 203 import doctest 204 doctest.testmod() 205

Source Code for Module parsimony.datasets.simulate.correlation_matrices