parsimony.functions.nesterov.tv

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.functions.nesterov.tv` module contains the loss function 4 and helper functions for Total variation, TV, smoothed using Nesterov's 5 technique. 6 7 Created on Mon Feb 3 10:46:47 2014 8 9 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 10 11 @author: Tommy Löfstedt, Edouard Duchesnay 12 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr 13 @license: BSD 3-clause. 14 """ 15 import math 16 17 import scipy.sparse as sparse 18 import numpy as np 19 20 from .. import properties 21 import parsimony.utils.consts as consts 22 import parsimony.utils.maths as maths 23 import parsimony.utils as utils 24 25 __all__ = ["TotalVariation", 26 "linear_operator_from_mask", "A_from_mask", 27 "linear_operator_from_subset_mask", "A_from_subset_mask", 28 "linear_operator_from_shape", "A_from_shape", 29 "linear_operator_from_mesh", "A_from_mesh", 30 "nesterov_linear_operator_from_mesh"]

31 32 33 -class TotalVariation(properties.NesterovFunction, 34 properties.Penalty, 35 properties.Constraint):

36 """The smoothed Total variation (TV) function 37 38 f(beta) = l * (TV(beta) - c), 39 40 where TV(beta) is the smoothed total variation function. The constrained 41 version has the form 42 43 TV(beta) <= c. 44 """

45 - def __init__(self, l, c=0.0, A=None, mu=0.0, penalty_start=0):

46 """ 47 Parameters 48 ---------- 49 l : Non-negative float. The Lagrange multiplier, or regularisation 50 constant, of the function. 51 52 c : Float. The limit of the constraint. The function is feasible if 53 TV(beta) <= c. The default value is c=0, i.e. the default is a 54 regularisation formulation. 55 56 A : Numpy array (usually sparse). The linear operator for the Nesterov 57 formulation. May not be None! 58 59 mu : Non-negative float. The regularisation constant for the smoothing. 60 61 penalty_start : Non-negative integer. The number of columns, variables 62 etc., to exempt from penalisation. Equivalently, the first 63 index to be penalised. Default is 0, all columns are included. 64 """ 65 super(TotalVariation, self).__init__(l, A=A, mu=mu, 66 penalty_start=penalty_start) 67 self._p = A[0].shape[1] 68 69 self.c = float(c) 70 71 self.reset()

72

73 - def reset(self):

74 75 self._lambda_max = None

76

77 - def f(self, beta):

78 """ Function value. 79 """ 80 if self.l < consts.TOLERANCE: 81 return 0.0 82 83 if self.penalty_start > 0: 84 beta_ = beta[self.penalty_start:, :] 85 else: 86 beta_ = beta 87 88 A = self.A() 89 abeta2 = A[0].dot(beta_) ** 2.0 90 for k in xrange(1, len(A)): 91 abeta2 += A[k].dot(beta_) ** 2 92 93 return self.l * (np.sum(np.sqrt(abeta2)) - self.c)

94 # 95 # return self.l * (np.sum(np.sqrt(A[0].dot(beta_) ** 2.0 + 96 # A[1].dot(beta_) ** 2.0 + 97 # A[2].dot(beta_) ** 2.0)) - self.c) 98

99 - def phi(self, alpha, beta):

100 """Function value with known alpha. 101 102 From the interface "NesterovFunction". 103 """ 104 if self.l < consts.TOLERANCE: 105 return 0.0 106 107 if self.penalty_start > 0: 108 beta_ = beta[self.penalty_start:, :] 109 else: 110 beta_ = beta 111 112 Aa = self.Aa(alpha) 113 114 alpha_sqsum = 0.0 115 for a in alpha: 116 alpha_sqsum += np.sum(a ** 2.0) 117 118 mu = self.get_mu() 119 120 return self.l * ((np.dot(beta_.T, Aa)[0, 0] 121 - (mu / 2.0) * alpha_sqsum) - self.c)

122

123 - def feasible(self, beta):

124 """Feasibility of the constraint. 125 126 From the interface "Constraint". 127 """ 128 if self.penalty_start > 0: 129 beta_ = beta[self.penalty_start:, :] 130 else: 131 beta_ = beta 132 133 A = self.A() 134 abeta2 = A[0].dot(beta_) ** 2.0 135 for k in xrange(1, len(A)): 136 abeta2 += A[k].dot(beta_) ** 2 137 val = np.sum(np.sqrt(abeta2)) 138 # val = np.sum(np.sqrt(A[0].dot(beta_) ** 2.0 + 139 # A[1].dot(beta_) ** 2.0 + 140 # A[2].dot(beta_) ** 2.0)) 141 return val <= self.c

142

143 - def L(self):

144 """ Lipschitz constant of the gradient. 145 146 From the interface "LipschitzContinuousGradient". 147 """ 148 if self.l < consts.TOLERANCE: 149 return 0.0 150 151 lmaxA = self.lambda_max() 152 153 return self.l * lmaxA / self.mu

154

155 - def lambda_max(self):

156 """ Largest eigenvalue of the corresponding covariance matrix. 157 158 From the interface "Eigenvalues". 159 """ 160 # Note that we can save the state here since lmax(A) does not change. 161 # TODO: This only work if the elements of self._A are scipy.sparse. We 162 # should allow dense matrices as well. 163 if len(self._A) == 3 \ 164 and self._A[1].nnz == 0 and self._A[2].nnz == 0: 165 # TODO: Instead of p, this should really be the number of non-zero 166 # rows of A. 167 self._lambda_max = 2.0 * (1.0 - math.cos(float(self._p - 1) 168 * math.pi 169 / float(self._p))) 170 171 elif self._lambda_max is None: 172 173 from parsimony.algorithms.nipals import FastSparseSVD 174 175 A = sparse.vstack(self.A()) 176 # TODO: Add max_iter here! 177 v = FastSparseSVD().run(A) # , max_iter=max_iter) 178 us = A.dot(v) 179 self._lambda_max = np.sum(us ** 2.0) 180 181 return self._lambda_max

182 183 # """ Linear operator of the Nesterov function. 184 # 185 # From the interface "NesterovFunction". 186 # """ 187 # def A(self): 188 # 189 # return self._A 190 191 # """ Computes A^\T\alpha. 192 # 193 # From the interface "NesterovFunction". 194 # """ 195 # def Aa(self, alpha): 196 # 197 # A = self.A() 198 # Aa = A[0].T.dot(alpha[0]) 199 # for i in xrange(1, len(A)): 200 # Aa += A[i].T.dot(alpha[i]) 201 # 202 # return Aa 203 204 # """ Dual variable of the Nesterov function. 205 # 206 # From the interface "NesterovFunction". 207 # """ 208 # def alpha(self, beta): 209 # 210 # # Compute a* 211 # A = self.A() 212 # alpha = [0] * len(A) 213 # for i in xrange(len(A)): 214 # alpha[i] = A[i].dot(beta) / self.mu 215 # 216 # # Apply projection 217 # alpha = self.project(alpha) 218 # 219 # return alpha 220 221 # old version limited to tv with 3 A matrices 222 # def project(self, a): 223 # """ Projection onto the compact space of the Nesterov function. 224 # 225 # From the interface "NesterovFunction". 226 # """ 227 # import pickle 228 # pickle.dump(a, open("/tmp/a.pkl", "wb")) 229 # ax = a[0] 230 # ay = a[1] 231 # az = a[2] 232 # anorm = ax ** 2.0 + ay ** 2.0 + az ** 2.0 233 # i = anorm > 1.0 234 # 235 # anorm_i = anorm[i] ** 0.5 # Square root is taken here. Faster. 236 # ax[i] = np.divide(ax[i], anorm_i) 237 # ay[i] = np.divide(ay[i], anorm_i) 238 # az[i] = np.divide(az[i], anorm_i) 239 # 240 # return [ax, ay, az] 241

242 - def project(self, a):

243 """ Projection onto the compact space of the Nesterov function. 244 245 From the interface "NesterovFunction". 246 """ 247 anorm = a[0] ** 2.0 248 for k in xrange(1, len(a)): 249 anorm += a[k] ** 2 250 i = anorm > 1.0 251 252 anorm_i = anorm[i] ** 0.5 # Square root is taken here. Faster. 253 for k in xrange(len(a)): 254 a[k][i] = np.divide(a[k][i], anorm_i) 255 256 return a

257

258 - def M(self):

259 """ The maximum value of the regularisation of the dual variable. We 260 have 261 262 M = max_{alpha in K} 0.5*|alpha|²_2. 263 264 From the interface "NesterovFunction". 265 """ 266 return self._A[0].shape[0] / 2.0

267

268 - def estimate_mu(self, beta):

269 """ Computes a "good" value of mu with respect to the given beta. 270 271 From the interface "NesterovFunction". 272 """ 273 if self.penalty_start > 0: 274 beta_ = beta[self.penalty_start:, :] 275 else: 276 beta_ = beta 277 278 A = self.A() 279 normAg = np.zeros((A[0].shape[0], 1)) 280 for Ai in A: 281 normAg += Ai.dot(beta_) ** 2.0 282 normAg = np.sqrt(normAg) 283 mu = np.max(normAg) 284 285 return mu

286

287 288 @utils.deprecated("linear_operator_from_mask") 289 -def A_from_mask(*args, **kwargs):

290 291 return linear_operator_from_mask(*args, **kwargs)

292

293 294 -def linear_operator_from_mask(mask, offset=0, weights=None):

295 """Generates the linear operator for the total variation Nesterov function 296 from a mask for a 3D image. 297 298 Parameters 299 ---------- 300 mask : Numpy array of integers. The mask has the same shape as the original 301 data. Non-null values correspond to columns of X. Groups may be 302 defined using different values in the mask. TV will be applied 303 within groups of the same value in the mask. 304 305 offset: Non-negative integer. The index of the first column, variable, 306 where TV applies. This is different from penalty_start which 307 define where the penalty applies. The offset defines where TV 308 applies within the penalised variables. 309 310 Example: X := [Intercept, Age, Weight, Image]. Intercept is 311 not penalized, TV does not apply on Age and Weight but only on 312 Image. Thus: penalty_start = 1, offset = 2 (skip Age and 313 Weight). 314 315 weights : Numpy array. The weight put on the gradient of every point. 316 Default is weight 1 for each point, or equivalently, no weight. The 317 weights is a numpy array of the same shape as mask. 318 """ 319 while len(mask.shape) < 3: 320 mask = mask[..., np.newaxis] 321 322 if weights is not None: 323 while len(weights.shape) < 3: 324 weights = weights[..., np.newaxis] 325 326 nx, ny, nz = mask.shape 327 mask_bool = mask != 0 328 xyz_mask = np.where(mask_bool) 329 Ax_i = list() 330 Ax_j = list() 331 Ax_v = list() 332 Ay_i = list() 333 Ay_j = list() 334 Ay_v = list() 335 Az_i = list() 336 Az_j = list() 337 Az_v = list() 338 n_compacts = 0 339 p = np.sum(mask_bool) + offset 340 341 # Mapping from image coordinate to flat masked array. 342 im2flat = np.zeros(mask.shape, dtype=int) 343 im2flat[:] = -1 344 im2flat[mask_bool] = np.arange(np.sum(mask_bool)) + offset 345 346 for pt in xrange(len(xyz_mask[0])): 347 348 found = False 349 x, y, z = xyz_mask[0][pt], xyz_mask[1][pt], xyz_mask[2][pt] 350 i_pt = im2flat[x, y, z] 351 val = mask[x, y, z] 352 353 if weights is not None: 354 w = weights[x, y, z] 355 else: 356 w = 1.0 357 358 if x + 1 < nx and (mask[x + 1, y, z] == val): 359 found = True 360 Ax_i += [i_pt, i_pt] 361 Ax_j += [i_pt, im2flat[x + 1, y, z]] 362 Ax_v += [-w, w] 363 if y + 1 < ny and (mask[x, y + 1, z] == val): 364 found = True 365 Ay_i += [i_pt, i_pt] 366 Ay_j += [i_pt, im2flat[x, y + 1, z]] 367 Ay_v += [-w, w] 368 if z + 1 < nz and (mask[x, y, z + 1] == val): 369 found = True 370 Az_i += [i_pt, i_pt] 371 Az_j += [i_pt, im2flat[x, y, z + 1]] 372 Az_v += [-w, w] 373 374 if found: 375 n_compacts += 1 376 377 Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p)) 378 Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p)) 379 Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p)) 380 381 return [Ax, Ay, Az], n_compacts

382

383 384 @utils.deprecated("linear_operator_from_subset_mask") 385 -def A_from_subset_mask(*args, **kwargs):

386 387 return linear_operator_from_subset_mask(*args, **kwargs)

388

389 390 -def linear_operator_from_subset_mask(mask, weights=None):

391 """Generates the linear operator for the total variation Nesterov function 392 from a mask for a 3D image. 393 394 The binary mask marks a subset of the variables that are supposed to be 395 smoothed. The mask has the same size as the input and output image. 396 397 Parameters 398 ---------- 399 mask : Numpy array. The mask. The mask does not involve any intercept 400 variables. 401 402 weights : Numpy array. The weight put on the gradient of every point. 403 Default is weight 1 for each point, or equivalently, no weight. The 404 weights is a numpy array of the same shape as mask. 405 """ 406 while len(mask.shape) < 3: 407 mask = mask[np.newaxis, :] 408 409 if weights is not None: 410 while len(weights.shape) < 3: 411 weights = weights[np.newaxis, :] 412 413 nz, ny, nx = mask.shape 414 mask = mask.astype(bool) 415 zyx_mask = np.where(mask) 416 Ax_i = list() 417 Ax_j = list() 418 Ax_v = list() 419 Ay_i = list() 420 Ay_j = list() 421 Ay_v = list() 422 Az_i = list() 423 Az_j = list() 424 Az_v = list() 425 num_compacts = 0 426 # p = np.sum(mask) 427 428 # Mapping from image coordinate to flat masked array. 429 def im2flat(sub, dims): 430 return sub[0] * dims[2] * dims[1] + \ 431 sub[1] * dims[2] + \ 432 sub[2]

433 # im2flat = np.zeros(mask.shape, dtype=int) 434 # im2flat[:] = -1 435 # im2flat[mask] = np.arange(p) 436 # im2flat[np.arange(p)] = np.arange(p) 437 438 for pt in xrange(len(zyx_mask[0])): 439 440 found = False 441 z, y, x = zyx_mask[0][pt], zyx_mask[1][pt], zyx_mask[2][pt] 442 i_pt = im2flat((z, y, x), mask.shape) 443 444 if weights is not None: 445 w = weights[z, y, x] 446 else: 447 w = 1.0 448 449 if z + 1 < nz and mask[z + 1, y, x]: 450 found = True 451 Az_i += [i_pt, i_pt] 452 Az_j += [i_pt, im2flat((z + 1, y, x), mask.shape)] 453 Az_v += [-w, w] 454 if y + 1 < ny and mask[z, y + 1, x]: 455 found = True 456 Ay_i += [i_pt, i_pt] 457 Ay_j += [i_pt, im2flat((z, y + 1, x), mask.shape)] 458 Ay_v += [-w, w] 459 if x + 1 < nx and mask[z, y, x + 1]: 460 found = True 461 Ax_i += [i_pt, i_pt] 462 Ax_j += [i_pt, im2flat((z, y, x + 1), mask.shape)] 463 Ax_v += [-w, w] 464 465 if found: 466 num_compacts += 1 467 468 p = np.prod(mask.shape) 469 Az = sparse.csr_matrix((Az_v, (Az_i, Az_j)), shape=(p, p)) 470 Ay = sparse.csr_matrix((Ay_v, (Ay_i, Ay_j)), shape=(p, p)) 471 Ax = sparse.csr_matrix((Ax_v, (Ax_i, Ax_j)), shape=(p, p)) 472 473 return [Ax, Ay, Az], num_compacts 474

475 476 @utils.deprecated("linear_operator_from_shape") 477 -def A_from_shape(*args, **kwargs):

478 479 return linear_operator_from_shape(*args, **kwargs)

480

481 482 -def linear_operator_from_shape(shape, weights=None):

483 """Generates the linear operator for the total variation Nesterov function 484 from the shape of a 1D, 2D or 3D image. 485 486 Parameters 487 ---------- 488 shape : List or tuple with 1, 2 or 3 integers. The shape of the 1D, 2D or 489 3D image. shape has the form X, (X,), (Y, X) or (Z, Y, X), where Z 490 is the number of "layers", Y is the number of rows and X is the 491 number of columns. The shape does not involve any intercept 492 variables. 493 494 weights : Sequence, e.g. list or numpy (p-by-1) array. Weights put on the 495 groups. Default is weight 1 for each group, i.e. no weight. 496 """ 497 if not isinstance(shape, (list, tuple)): 498 shape = [shape] 499 while len(shape) < 3: 500 shape = tuple([1] + list(shape)) 501 502 nz = shape[0] 503 ny = shape[1] 504 nx = shape[2] 505 p = nx * ny * nz 506 ind = np.arange(p).reshape((nz, ny, nx)) 507 508 if weights is not None: 509 weights = np.array(weights) 510 weights = weights.ravel() 511 # w = sparse.spdiags(weights.ravel(), 0, p, p) 512 513 if nx > 1: 514 if weights is not None: 515 Ax = sparse.spdiags(weights, -1, p, p).T - \ 516 sparse.spdiags(weights, 0, p, p) 517 Ax = Ax.tocsr() 518 else: 519 Ax = sparse.eye(p, p, 1, format='csr') - \ 520 sparse.eye(p, p) 521 zind = ind[:, :, -1].ravel() 522 for i in zind: 523 Ax.data[Ax.indptr[i]: \ 524 Ax.indptr[i + 1]] = 0 525 Ax.eliminate_zeros() 526 else: 527 Ax = sparse.csr_matrix((p, p), dtype=float) 528 529 if ny > 1: 530 if weights is not None: 531 Ay = sparse.spdiags(weights, -nx, p, p).T - \ 532 sparse.spdiags(weights, 0, p, p) 533 Ay = Ay.tocsr() 534 else: 535 Ay = sparse.eye(p, p, nx, format='csr') - \ 536 sparse.eye(p, p) 537 538 yind = ind[:, -1, :].ravel() 539 for i in yind: 540 Ay.data[Ay.indptr[i]: \ 541 Ay.indptr[i + 1]] = 0 542 Ay.eliminate_zeros() 543 else: 544 Ay = sparse.csr_matrix((p, p), dtype=float) 545 546 if nz > 1: 547 if weights is not None: 548 Az = sparse.spdiags(weights, -(ny * nx), p, p).T - \ 549 sparse.spdiags(weights, 0, p, p) 550 Az = Az.tocsr() 551 else: 552 Az = (sparse.eye(p, p, ny * nx, format='csr') - \ 553 sparse.eye(p, p)) 554 555 xind = ind[-1, :, :].ravel() 556 for i in xind: 557 Az.data[Az.indptr[i]: \ 558 Az.indptr[i + 1]] = 0 559 Az.eliminate_zeros() 560 else: 561 Az = sparse.csr_matrix((p, p), dtype=float) 562 563 return [Ax, Ay, Az], (nz * ny * nx - 1)

564

565 566 @utils.deprecated("linear_operator_from_mesh") 567 -def A_from_mesh(*args, **kwargs):

568 569 return linear_operator_from_mesh(*args, **kwargs)

570

571 572 @utils.deprecated("linear_operator_from_mesh") 573 -def nesterov_linear_operator_from_mesh(*args, **kwargs):

574 575 return linear_operator_from_mesh(*args, **kwargs)

576

577 578 -def linear_operator_from_mesh(mesh_coord, mesh_triangles, mask=None, offset=0, 579 weights=None):

580 """Generates the linear operator for the total variation Nesterov function 581 from a mesh. 582 583 Parameters 584 ---------- 585 mesh_coord : Numpy array [n, 3] of float. 586 587 mesh_triangles : Numpy array, n_triangles-by-3. The (integer) indices of 588 the three nodes forming the triangle. 589 590 mask : Numpy array (shape (n,)) of integers/boolean. Non-null values 591 correspond to columns of X. Groups may be defined using different 592 values in the mask. TV will be applied within groups of the same 593 value in the mask. 594 595 offset : Non-negative integer. The index of the first column, variable, 596 where TV applies. This is different from penalty_start which 597 define where the penalty applies. The offset defines where TV 598 applies within the penalised variables. 599 600 Example: X := [Intercept, Age, Weight, Image]. Intercept is 601 not penalized, TV does not apply on Age and Weight but only on 602 Image. Thus: penalty_start = 1, offset = 2 (skip Age and 603 Weight). 604 605 weights : Numpy array. The weight put on the gradient of every point. 606 Default is weight 1 for each point, or equivalently, no weight. The 607 weights is a numpy array of the same shape as mask. 608 609 Returns 610 ------- 611 out1 : List or sparse matrices. Linear operator for the total variation 612 Nesterov function computed over a mesh. 613 614 out2 : Integer. The number of compacts. 615 616 Examples 617 -------- 618 >>> import numpy as np 619 >>> import parsimony.functions.nesterov.tv as tv_helper 620 >>> mesh_coord = np.array([[0, 0], [1, 0], [0, 1], [1, 1], [0, 2], [1, 2]]) 621 >>> mesh_triangles = np.array([[0 ,1, 3], [0, 2 ,3], [2, 3, 5], [2, 4, 5]]) 622 >>> A, _ = tv_helper.nesterov_linear_operator_from_mesh(mesh_coord, 623 ... mesh_triangles) 624 """ 625 if mask is None: 626 mask = np.ones(mesh_coord.shape[0], dtype=bool) 627 assert mask.shape[0] == mesh_coord.shape[0] 628 mask_bool = mask != 0 629 mask_idx = np.where(mask_bool)[0] 630 # Mapping from full array to masked array. 631 map_full2masked = np.zeros(mask.shape, dtype=int) 632 map_full2masked[:] = -1 633 map_full2masked[mask_bool] = np.arange(np.sum(mask_bool)) + offset 634 ## 1) Associate edges to nodes 635 nodes_with_edges = [[] for i in xrange(mesh_coord.shape[0])] 636 637 def connect_edge_to_node(node_idx1, node_idx2, nodes_with_edges): 638 # Attach edge to first node. 639 if np.sum(mesh_coord[node_idx1] - mesh_coord[node_idx2]) >= 0: 640 edge = [node_idx1, node_idx2] 641 if not edge in nodes_with_edges[node_idx1]: 642 nodes_with_edges[node_idx1].append(edge) 643 else: # attach edge to second node 644 edge = [node_idx2, node_idx1] 645 if not edge in nodes_with_edges[node_idx2]: 646 nodes_with_edges[node_idx2].append(edge)

647 for i in xrange(mesh_triangles.shape[0]): 648 t = mesh_triangles[i, :] 649 connect_edge_to_node(t[0], t[1], nodes_with_edges) 650 connect_edge_to_node(t[0], t[2], nodes_with_edges) 651 connect_edge_to_node(t[1], t[2], nodes_with_edges) 652 max_connectivity = np.max(np.array([len(n) for n in nodes_with_edges])) 653 # 3. build sparse matrices 654 # 1..max_connectivity of i, j and value 655 A = [[[], [], []] for i in xrange(max_connectivity)] 656 n_compacts = 0 657 for node_idx in mask_idx: 658 #node_idx = 0 659 found = False 660 node = nodes_with_edges[node_idx] 661 for i, v in enumerate(node): 662 found = False 663 if weights is not None: 664 w = weights[i] 665 else: 666 w = 1.0 667 #print i, v 668 node1_idx, node2_idx = v 669 if mask_bool[node1_idx] and mask_bool[node2_idx]: 670 found = True 671 A[i][0] += [map_full2masked[node1_idx], 672 map_full2masked[node1_idx]] 673 A[i][1] += [map_full2masked[node1_idx], 674 map_full2masked[node2_idx]] 675 A[i][2] += [-w, w] 676 if found: 677 n_compacts += 1 678 p = mask.sum() 679 A = [sparse.csr_matrix((A[i][2], (A[i][0], A[i][1])), 680 shape=(p, p)) for i in xrange(len(A))] 681 682 return A, n_compacts 683

Source Code for Module parsimony.functions.nesterov.tv