parsimony.functions.nesterov.gl

29 """Group L1-L2 function, with overlapping groups. Represents the 30 function 31 32 GL(x) = l * (sum_{g=1}^G ||x_g||_2 - c), 33 34 where ||.||_2 is the L2-norm. The coinstrained version has the form 35 36 GL(x) <= c. 37 38 Attributes 39 ---------- 40 l : Non-negative float. The Lagrange multiplier, or regularisation 41 constant, of the function. 42 43 c : Float. The limit of the constraint. The function is feasible if 44 GL(beta) <= c. The default value is c=0, i.e. the default is a 45 regularised formulation. 46 47 mu : Float. The Nesterov function regularisation constant for the 48 smoothing. 49 50 penalty_start : Non-negative integer. The number of columns, variables 51 etc., to except from penalisation. Equivalently, the first index 52 to be penalised. Default is 0, all columns are included. 53 """

54 - def __init__(self, l, c=0.0, A=None, mu=0.0, penalty_start=0):

55 """ 56 Parameters 57 ---------- 58 l : Non-negative float. The Lagrange multiplier, or regularisation 59 constant, of the function. 60 61 c : Float. The limit of the constraint. The function is feasible if 62 GL(beta) <= c. The default value is c=0, i.e. the default is 63 a regularised formulation. 64 65 A : Numpy array. A (usually sparse) matrix. The linear operator for 66 the Nesterov formulation. May not be None! 67 68 mu : Float. The Nesterov function regularisation constant for the 69 smoothing. 70 71 penalty_start : Non-negative integer. The number of columns, variables 72 etc., to exempt from penalisation. Equivalently, the first 73 index to be penalised. Default is 0, all columns are included. 74 """ 75 super(GroupLassoOverlap, self).__init__(l, A=A, mu=mu, 76 penalty_start=penalty_start) 77 78 self.c = float(c) 79 80 self.reset()

81

82 - def reset(self):

83 84 self._lambda_max = None

85

86 - def f(self, beta):

87 """ Function value. 88 """ 89 if self.l < consts.TOLERANCE: 90 return 0.0 91 92 if self.penalty_start > 0: 93 beta_ = beta[self.penalty_start:, :] 94 else: 95 beta_ = beta 96 97 A = self.A() 98 normsum = 0.0 99 for Ag in A: 100 normsum += maths.norm(Ag.dot(beta_)) 101 102 return self.l * (normsum - self.c)

103

104 - def phi(self, alpha, beta):

105 """Function value with known alpha. 106 107 From the interface "NesterovFunction". 108 """ 109 if self.l < consts.TOLERANCE: 110 return 0.0 111 112 if self.penalty_start > 0: 113 beta_ = beta[self.penalty_start:, :] 114 else: 115 beta_ = beta 116 117 Aa = self.Aa(alpha) 118 119 alpha_sqsum = 0.0 120 for a in alpha: 121 alpha_sqsum += np.sum(a ** 2.0) 122 123 mu = self.get_mu() 124 125 return self.l * ((np.dot(beta_.T, Aa)[0, 0] 126 - (mu / 2.0) * alpha_sqsum) - self.c)

127

128 - def feasible(self, beta):

129 """Feasibility of the constraint. 130 131 From the interface "Constraint". 132 """ 133 if self.penalty_start > 0: 134 beta_ = beta[self.penalty_start:, :] 135 else: 136 beta_ = beta 137 138 A = self.A() 139 normsum = 0.0 140 for Ag in A: 141 normsum += maths.norm(Ag.dot(beta_)) 142 143 return normsum <= self.c

144

145 - def L(self):

146 """ Lipschitz constant of the gradient. 147 148 From the interface "LipschitzContinuousGradient". 149 """ 150 if self.l < consts.TOLERANCE: 151 return 0.0 152 153 lmaxA = self.lambda_max() 154 155 return self.l * lmaxA / self.mu

156

157 - def lambda_max(self):

158 """ Largest eigenvalue of the corresponding covariance matrix. 159 160 From the interface "Eigenvalues". 161 """ 162 # Note that we can save the state here since lambda_max(A) is not 163 # allowed to change. 164 if self._lambda_max is None: 165 A = self.A() 166 colsum = 0.0 167 for Ag in A: 168 B = Ag.copy() 169 B.data **= 2.0 170 colsum += B.sum(axis=0) 171 172 self._lambda_max = np.max(colsum) 173 174 return self._lambda_max

175

176 - def project(self, a):

177 """ Projection onto the compact space of the Nesterov function. 178 179 From the interface "NesterovFunction". 180 181 Parameters 182 ---------- 183 alpha : List of numpy arrays (x-by-1). The not-yet-projected dual 184 variable alpha. 185 """ 186 for i in xrange(len(a)): 187 astar = a[i] 188 normas = np.sqrt(np.sum(astar ** 2.0)) 189 if normas > 1.0: 190 astar *= 1.0 / normas 191 a[i] = astar 192 193 return a

194

195 - def M(self):

196 """ The maximum value of the regularisation of the dual variable. We 197 have 198 199 M = max_{alpha in K} 0.5*||alpha||²_2. 200 201 Since each group may have at most L2-norm 1, M may not exceed the 202 number of groups, i.e. the number of groups divided by two is the 203 maximum. 204 205 From the interface "NesterovFunction". 206 """ 207 return float(len(self.A())) / 2.0

208 209 """ Computes a "good" value of mu with respect to the given beta. 210 211 From the interface "NesterovFunction". 212 """

213 - def estimate_mu(self, beta):

214 215 if self.penalty_start > 0: 216 beta_ = beta[self.penalty_start:, :] 217 else: 218 beta_ = beta 219 220 SS = 0.0 221 A = self.A() 222 for i in xrange(len(A)): 223 SS = max(SS, maths.norm(A[i].dot(beta_))) 224 225 return SS

236 """Generates the linear operator for the group lasso Nesterov function 237 from the groups of variables. 238 239 Parameters: 240 ---------- 241 num_variables : Integer. The total number of variables, including the 242 intercept variable(s). 243 244 groups : A list of lists. The outer list represents the groups and the 245 inner lists represent the variables in the groups. E.g. [[1, 2], 246 [2, 3]] contains two groups ([1, 2] and [2, 3]) with variable 1 and 247 2 in the first group and variables 2 and 3 in the second group. 248 249 weights : List. Weights put on the groups. Default is weight 1 for each 250 group. 251 252 penalty_start : Non-negative integer. The number of variables to exempt 253 from penalisation. Equivalently, the first index to be penalised. 254 Default is 0, all variables are included. 255 """ 256 if weights is None: 257 weights = [1.0] * len(groups) 258 259 A = list() 260 for g in xrange(len(groups)): 261 Gi = groups[g] 262 lenGi = len(Gi) 263 Ag = sparse.lil_matrix((lenGi, num_variables - penalty_start)) 264 for i in xrange(lenGi): 265 w = weights[g] 266 Ag[i, Gi[i] - penalty_start] = w 267 268 # Matrix operations are a lot faster when the sparse matrix is csr 269 A.append(Ag.tocsr()) 270 271 return A

Source Code for Module parsimony.functions.nesterov.gl