parsimony.algorithms.proximal

1 # -*- coding: utf-8 -*- 2 """ 3 The :mod:`parsimony.algorithms.proximal` module contains several algorithms 4 that involve proximal operators. 5 6 Algorithms may not store states. I.e., if they are classes, do not keep 7 references to objects with state in the algorithm objects. It should be 8 possible to copy and share algorithms between e.g. estimators, and thus they 9 should not depend on any state. 10 11 Created on Mon Jun 2 15:42:13 2014 12 13 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved. 14 15 @author: Tommy Löfstedt, Edouard Duchesnay, Fouad Hadj-Selem 16 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr, 17 fouad.hadjselem@cea.fr 18 @license: BSD 3-clause. 19 """ 20 import numpy as np 21 try: 22 from scipy.interpolate import PchipInterpolator as interp1 23 except ImportError: 24 from scipy.interpolate import interp1d as interp1 25 26 try: 27 from . import bases # Only works when imported as a package. 28 except ValueError: 29 import parsimony.algorithms.bases as bases # When run as a program. 30 import parsimony.utils as utils 31 import parsimony.utils.maths as maths 32 import parsimony.utils.consts as consts 33 from parsimony.algorithms.utils import Info 34 import parsimony.functions.properties as properties 35 36 __all__ = ["ISTA", "FISTA", "CONESTA", "StaticCONESTA", 37 "ADMM", 38 39 "DykstrasProjectionAlgorithm", 40 "ParallelDykstrasProjectionAlgorithm"]

41 42 43 -class ISTA(bases.ExplicitAlgorithm, 44 bases.IterativeAlgorithm, 45 bases.InformationAlgorithm):

46 """The iterative shrinkage-thresholding algorithm. 47 48 Parameters 49 ---------- 50 eps : Positive float. Tolerance for the stopping criterion. 51 52 info : List or tuple of utils.consts.Info. What, if any, extra run 53 information should be stored. Default is an empty list, which 54 means that no run information is computed nor returned. 55 56 max_iter : Non-negative integer. Maximum allowed number of iterations. 57 58 min_iter : Non-negative integer less than or equal to max_iter. Minimum 59 number of iterations that must be performed. Default is 1. 60 61 Examples 62 -------- 63 >>> from parsimony.algorithms.proximal import ISTA 64 >>> from parsimony.functions import LinearRegressionL1L2TV 65 >>> import scipy.sparse as sparse 66 >>> import numpy as np 67 >>> 68 >>> np.random.seed(42) 69 >>> X = np.random.rand(100, 50) 70 >>> y = np.random.rand(100, 1) 71 >>> A = sparse.csr_matrix((50, 50)) # Unused here 72 >>> function = LinearRegressionL1L2TV(X, y, 0.0, 0.0, 0.0, 73 ... A=A, mu=0.0) 74 >>> ista = ISTA(max_iter=10000) 75 >>> beta1 = ista.run(function, np.random.rand(50, 1)) 76 >>> beta2 = np.dot(np.linalg.pinv(X), y) 77 >>> round(np.linalg.norm(beta1 - beta2), 13) 78 0.0003121557633 79 >>> 80 >>> np.random.seed(42) 81 >>> X = np.random.rand(100, 50) 82 >>> y = np.random.rand(100, 1) 83 >>> A = sparse.csr_matrix((50, 50)) # Unused here 84 >>> function = LinearRegressionL1L2TV(X, y, 0.1, 0.0, 0.0, 85 ... A=A, mu=0.0) 86 >>> ista = ISTA(max_iter=10000) 87 >>> beta1 = ista.run(function, np.random.rand(50, 1)) 88 >>> beta2 = np.dot(np.linalg.pinv(X), y) 89 >>> round(np.linalg.norm(beta1 - beta2), 13) 90 0.8272330310458 91 >>> np.linalg.norm(beta2.ravel(), 0) 92 50 93 >>> np.linalg.norm(beta1.ravel(), 0) 94 7 95 """ 96 INTERFACES = [properties.Function, 97 properties.Gradient, 98 properties.StepSize, 99 properties.ProximalOperator] 100 101 INFO_PROVIDED = [Info.ok, 102 Info.num_iter, 103 Info.time, 104 Info.fvalue, 105 Info.converged] 106

107 - def __init__(self, eps=consts.TOLERANCE, 108 info=[], max_iter=20000, min_iter=1):

109 110 super(ISTA, self).__init__(info=info, 111 max_iter=max_iter, 112 min_iter=min_iter) 113 self.eps = eps

114 115 @bases.force_reset 116 @bases.check_compatibility

117 - def run(self, function, beta):

118 """Find the minimiser of the given function, starting at beta. 119 120 Parameters 121 ---------- 122 function : Function. The function to minimise. 123 124 beta : Numpy array. The start vector. 125 """ 126 if self.info_requested(Info.ok): 127 self.info_set(Info.ok, False) 128 129 step = function.step(beta) 130 131 betanew = betaold = beta 132 133 if self.info_requested(Info.time): 134 t = [] 135 if self.info_requested(Info.fvalue): 136 f = [] 137 if self.info_requested(Info.converged): 138 self.info_set(Info.converged, False) 139 140 for i in xrange(1, self.max_iter + 1): 141 142 if self.info_requested(Info.time): 143 tm = utils.time_cpu() 144 145 step = function.step(betanew) 146 147 betaold = betanew 148 betanew = function.prox(betaold - step * function.grad(betaold), 149 step, 150 eps=1.0 / (float(i) ** (2.0 + consts.FLOAT_EPSILON)), 151 max_iter=self.max_iter) 152 153 if self.info_requested(Info.time): 154 t.append(utils.time_cpu() - tm) 155 if self.info_requested(Info.fvalue): 156 f.append(function.f(betanew)) 157 158 if (1.0 / step) * maths.norm(betanew - betaold) < self.eps \ 159 and i >= self.min_iter: 160 161 if self.info_requested(Info.converged): 162 self.info_set(Info.converged, True) 163 164 break 165 166 self.num_iter = i 167 168 if self.info_requested(Info.num_iter): 169 self.info_set(Info.num_iter, i) 170 if self.info_requested(Info.time): 171 self.info_set(Info.time, t) 172 if self.info_requested(Info.fvalue): 173 self.info_set(Info.fvalue, f) 174 if self.info_requested(Info.ok): 175 self.info_set(Info.ok, True) 176 177 return betanew

178

179 180 -class FISTA(bases.ExplicitAlgorithm, 181 bases.IterativeAlgorithm, 182 bases.InformationAlgorithm):

183 """ The fast iterative shrinkage-thresholding algorithm. 184 185 Parameters 186 ---------- 187 eps : Positive float. Tolerance for the stopping criterion. 188 189 use_gap : Boolean. If true, FISTA will use a dual gap, from the interface 190 DualFunction, in the stopping criterion as 191 192 if function.gap(beta) < eps: 193 break 194 195 Default is False, since the gap may be very expensive to compute. 196 197 info : List or tuple of utils.consts.Info. What, if any, extra run 198 information should be stored. Default is an empty list, which means 199 that no run information is computed nor returned. 200 201 max_iter : Non-negative integer. Maximum allowed number of iterations. 202 203 min_iter : Non-negative integer less than or equal to max_iter. Minimum 204 number of iterations that must be performed. Default is 1. 205 206 Example 207 ------- 208 >>> from parsimony.algorithms.proximal import FISTA 209 >>> from parsimony.functions import LinearRegressionL1L2TV 210 >>> import scipy.sparse as sparse 211 >>> import numpy as np 212 >>> 213 >>> np.random.seed(42) 214 >>> X = np.random.rand(100, 50) 215 >>> y = np.random.rand(100, 1) 216 >>> A = sparse.csr_matrix((50, 50)) # Unused here 217 >>> function = LinearRegressionL1L2TV(X, y, 0.0, 0.0, 0.0, 218 ... A=A, mu=0.0) 219 >>> fista = FISTA(max_iter=10000) 220 >>> beta1 = fista.run(function, np.random.rand(50, 1)) 221 >>> beta2 = np.dot(np.linalg.pinv(X), y) 222 >>> round(np.linalg.norm(beta1 - beta2), 13) 223 4.6182817e-06 224 >>> 225 >>> np.random.seed(42) 226 >>> X = np.random.rand(100, 50) 227 >>> y = np.random.rand(100, 1) 228 >>> A = sparse.csr_matrix((50, 50)) # Unused here 229 >>> function = LinearRegressionL1L2TV(X, y, 0.1, 0.0, 0.0, 230 ... A=A, mu=0.0) 231 >>> fista = FISTA(max_iter=10000) 232 >>> beta1 = fista.run(function, np.random.rand(50, 1)) 233 >>> beta2 = np.dot(np.linalg.pinv(X), y) 234 >>> round(np.linalg.norm(beta1 - beta2), 14) 235 0.82723292510703 236 >>> np.linalg.norm(beta2.ravel(), 0) 237 50 238 >>> np.linalg.norm(beta1.ravel(), 0) 239 7 240 """ 241 INTERFACES = [properties.Function, 242 properties.Gradient, 243 properties.StepSize, 244 properties.ProximalOperator] 245 246 INFO_PROVIDED = [Info.ok, 247 Info.num_iter, 248 Info.time, 249 Info.fvalue, 250 Info.converged, 251 Info.gap] 252

253 - def __init__(self, use_gap=False, 254 info=[], eps=consts.TOLERANCE, max_iter=10000, min_iter=1, 255 simulation=False):

256 257 super(FISTA, self).__init__(info=info, 258 max_iter=max_iter, 259 min_iter=min_iter) 260 261 self.use_gap = bool(use_gap) 262 self.eps = max(consts.FLOAT_EPSILON, float(eps)) 263 264 self.simulation = bool(simulation)

265 266 @bases.force_reset 267 @bases.check_compatibility

268 - def run(self, function, beta):

269 """Find the minimiser of the given function, starting at beta. 270 271 Parameters 272 ---------- 273 function : Function. The function to minimise. 274 275 beta : Numpy array. The start vector. 276 """ 277 if self.info_requested(Info.ok): 278 self.info_set(Info.ok, False) 279 280 z = betanew = betaold = beta 281 282 if self.info_requested(Info.time): 283 t_ = [] 284 if self.info_requested(Info.fvalue): 285 f_ = [] 286 if self.info_requested(Info.converged): 287 self.info_set(Info.converged, False) 288 if self.info_requested(Info.gap): 289 gap_ = [] 290 291 for i in xrange(1, max(self.min_iter, self.max_iter) + 1): 292 293 if self.info_requested(Info.time): 294 tm = utils.time_cpu() 295 296 z = betanew + ((i - 2.0) / (i + 1.0)) * (betanew - betaold) 297 298 step = function.step(z) 299 300 betaold = betanew 301 betanew = function.prox(z - step * function.grad(z), 302 step, 303 eps=1.0 / (float(i) ** (4.0 + consts.FLOAT_EPSILON)), 304 max_iter=self.max_iter) 305 306 if self.info_requested(Info.time): 307 t_.append(utils.time_cpu() - tm) 308 if self.info_requested(Info.fvalue): 309 f_.append(function.f(betanew)) 310 311 if self.use_gap: 312 313 gap = function.gap(betanew, 314 eps=self.eps, 315 max_iter=self.max_iter) 316 317 # TODO: Warn if G_new < -consts.TOLERANCE. 318 gap = abs(gap) # May happen close to machine epsilon. 319 if self.info_requested(Info.gap): 320 gap_.append(gap) 321 322 if not self.simulation: 323 if gap < self.eps: 324 if self.info_requested(Info.converged): 325 self.info_set(Info.converged, True) 326 327 break 328 else: 329 if not self.simulation: 330 if step > 0.0: 331 if (1.0 / step) * maths.norm(betanew - z) < self.eps \ 332 and i >= self.min_iter: 333 334 if self.info_requested(Info.converged): 335 self.info_set(Info.converged, True) 336 337 break 338 339 else: # TODO: Fix this! 340 if maths.norm(betanew - z) < self.eps \ 341 and i >= self.min_iter: 342 343 if self.info_requested(Info.converged): 344 self.info_set(Info.converged, True) 345 346 break 347 348 self.num_iter = i 349 350 if self.info_requested(Info.num_iter): 351 self.info_set(Info.num_iter, i) 352 if self.info_requested(Info.time): 353 self.info_set(Info.time, t_) 354 if self.info_requested(Info.fvalue): 355 self.info_set(Info.fvalue, f_) 356 if self.info_requested(Info.gap): 357 self.info_set(Info.gap, gap_) 358 if self.info_requested(Info.ok): 359 self.info_set(Info.ok, True) 360 361 return betanew

362

363 364 -class CONESTA(bases.ExplicitAlgorithm, 365 bases.IterativeAlgorithm, 366 bases.InformationAlgorithm):

367 """COntinuation with NEsterov smoothing in a Soft-Thresholding Algorithm, 368 or CONESTA for short. 369 370 Parameters 371 ---------- 372 mu_min : Non-negative float. A "very small" mu to use as a lower bound for 373 mu. 374 375 tau : Float, 0 < tau < 1. The rate at which eps is decreasing. Default 376 is 0.5. 377 378 eps : Positive float. Tolerance for the stopping criterion. 379 380 info : List or tuple of utils.Info. What, if any, extra run information 381 should be stored. Default is an empty list, which means that no 382 run information is computed nor returned. 383 384 max_iter : Non-negative integer. Maximum allowed number of iterations. 385 386 min_iter : Non-negative integer less than or equal to max_iter. Minimum 387 number of iterations that must be performed. Default is 1. 388 """ 389 INTERFACES = [properties.NesterovFunction, 390 properties.StepSize, 391 properties.ProximalOperator, 392 properties.Continuation, 393 properties.DualFunction] 394 395 INFO_PROVIDED = [Info.ok, 396 Info.converged, 397 Info.num_iter, 398 Info.continuations, 399 Info.time, 400 Info.fvalue, 401 Info.gap, 402 Info.mu] 403

404 - def __init__(self, mu_min=consts.TOLERANCE, tau=0.5, 405 info=[], eps=consts.TOLERANCE, max_iter=10000, min_iter=1, 406 simulation=False):

407 408 super(CONESTA, self).__init__(info=info, 409 max_iter=max_iter, min_iter=min_iter) 410 411 self.mu_min = max(consts.FLOAT_EPSILON, float(mu_min)) 412 self.tau = max(consts.TOLERANCE, 413 min(float(tau), 1.0 - consts.TOLERANCE)) 414 self.eps = max(consts.TOLERANCE, float(eps)) 415 self.simulation = bool(simulation)

416 417 @bases.force_reset 418 @bases.check_compatibility

419 - def run(self, function, beta):

420 421 # Copy the allowed info keys for FISTA. 422 fista_info = list() 423 for nfo in self.info_copy(): 424 if nfo in FISTA.INFO_PROVIDED: 425 fista_info.append(nfo) 426 # CONESTA always asks for the gap. 427 if Info.gap not in fista_info: 428 fista_info.append(Info.gap) 429 430 # Create the inner algorithm. 431 algorithm = FISTA(use_gap=True, info=fista_info, eps=self.eps, 432 max_iter=self.max_iter, min_iter=self.min_iter) 433 434 # Not ok until the end. 435 if self.info_requested(Info.ok): 436 self.info_set(Info.ok, False) 437 438 # Time the init computation (essentialy Lipchitz constant in mu_opt). 439 if self.info_requested(Info.time): 440 init_time = utils.time_cpu() 441 442 # Compute current gap, precision eps (gap decreased by tau) and mu. 443 old_mu = function.set_mu(consts.TOLERANCE) 444 gap = function.gap(beta, eps=self.eps, max_iter=self.max_iter) 445 function.set_mu(old_mu) 446 # Obtain the gap from the last FISTA run. May be small and negative 447 # close to machine epsilon. 448 eps = self.tau * abs(gap) 449 # TODO: Warn if gap < -consts.TOLERANCE. 450 mu = function.mu_opt(eps) 451 function.set_mu(mu) 452 gM = function.eps_max(1.0) 453 454 # Initialise info variables. Info variables have the suffix "_". 455 if self.info_requested(Info.time): 456 t_ = [] 457 init_time = utils.time_cpu() - init_time 458 if self.info_requested(Info.fvalue): 459 f_ = [] 460 if self.info_requested(Info.gap): 461 gap_ = [] 462 if self.info_requested(Info.converged): 463 self.info_set(Info.converged, False) 464 if self.info_requested(Info.mu): 465 mu_ = [] 466 467 i = 0 # Iteration counter. 468 while True: 469 converged = False 470 471 # Current precision. 472 derived_eps = max(eps, self.eps) - mu * gM 473 474 # Set current parameters to algorithm. 475 algorithm.set_params(eps=derived_eps, 476 max_iter=self.max_iter - self.num_iter) 477 # Run FISTA. 478 beta = algorithm.run(function, beta) 479 480 # Update global iteration count. 481 self.num_iter += algorithm.num_iter 482 483 # Get info from algorithm. 484 if Info.time in algorithm.info and \ 485 self.info_requested(Info.time): 486 t_ += algorithm.info_get(Info.time) 487 if i == 0: # Add init time to first iteration. 488 t_[0] += init_time 489 if Info.fvalue in algorithm.info and \ 490 self.info_requested(Info.fvalue): 491 f_ += algorithm.info_get(Info.fvalue) 492 if self.info_requested(Info.mu): 493 mu_ += [mu] * algorithm.num_iter 494 if self.info_requested(Info.gap): 495 gap_ += algorithm.info_get(Info.gap) 496 497 #print gap, derived_eps, eps, mu, self.tau, self.num_iter 498 499 # Obtain the gap from the last FISTA run. May be small and negative 500 # close to machine epsilon. 501 gap = abs(algorithm.info_get(Info.gap)[-1]) 502 # TODO: Warn if gap < -consts.TOLERANCE. 503 504 if not self.simulation: 505 if gap < self.eps - mu * gM: 506 507 if self.info_requested(Info.converged): 508 self.info_set(Info.converged, True) 509 510 converged = True 511 512 # Stopping criteria. 513 if (converged or self.num_iter >= self.max_iter) \ 514 and self.num_iter >= self.min_iter: 515 break 516 517 # Update the precision eps. 518 # eps = self.tau * (gap + mu * gM) 519 eps = max(self.eps, self.tau * (gap + mu * gM)) 520 # Compute and update mu. 521 mu = max(self.mu_min, min(function.mu_opt(eps), mu)) 522 function.set_mu(mu) 523 524 i = i + 1 525 526 if self.info_requested(Info.num_iter): 527 self.info_set(Info.num_iter, self.num_iter) 528 if self.info_requested(Info.continuations): 529 self.info_set(Info.continuations, i + 1) 530 if self.info_requested(Info.time): 531 self.info_set(Info.time, t_) 532 if self.info_requested(Info.fvalue): 533 self.info_set(Info.fvalue, f_) 534 if self.info_requested(Info.gap): 535 self.info_set(Info.gap, gap_) 536 if self.info_requested(Info.mu): 537 self.info_set(Info.mu, mu_) 538 if self.info_requested(Info.ok): 539 self.info_set(Info.ok, True) 540 541 return beta

542

543 544 -class StaticCONESTA(bases.ExplicitAlgorithm, 545 bases.IterativeAlgorithm, 546 bases.InformationAlgorithm):

547 """COntinuation with NEsterov smoothing in a Soft-Thresholding Algorithm, 548 or CONESTA for short, with a statically decreasing sequence of eps and mu. 549 550 Parameters 551 ---------- 552 mu_min : Non-negative float. A "very small" mu to use as a lower bound for 553 mu. 554 555 tau : Float, 0 < tau < 1. The rate at which eps is decreasing. Default 556 is 0.5. 557 558 exponent : Float, in [1.001, 2.0]. The assumed convergence rate of 559 ||beta* - beta_k||_2 for k=1,2,... is O(1 / k^exponent). Default 560 is 1.5. 561 562 eps : Positive float. Tolerance for the stopping criterion. 563 564 info : List or tuple of utils.Info. What, if any, extra run information 565 should be stored. Default is an empty list, which means that no 566 run information is computed nor returned. 567 568 max_iter : Non-negative integer. Maximum allowed number of iterations. 569 570 min_iter : Non-negative integer less than or equal to max_iter. Minimum 571 number of iterations that must be performed. Default is 1. 572 573 Example 574 ------- 575 >>> from parsimony.algorithms.proximal import StaticCONESTA 576 >>> from parsimony.functions.nesterov import l1tv 577 >>> from parsimony.functions import LinearRegressionL1L2TV 578 >>> import scipy.sparse as sparse 579 >>> import numpy as np 580 >>> 581 >>> np.random.seed(42) 582 >>> X = np.random.rand(100, 50) 583 >>> y = np.random.rand(100, 1) 584 >>> A = sparse.csr_matrix((50, 50)) # Unused here 585 >>> function = LinearRegressionL1L2TV(X, y, 0.0, 0.0, 0.0, 586 ... A=[A], mu=0.0) 587 >>> static_conesta = StaticCONESTA(max_iter=10000) 588 >>> beta1 = static_conesta.run(function, np.random.rand(50, 1)) 589 >>> beta2 = np.dot(np.linalg.pinv(X), y) 590 >>> round(np.linalg.norm(beta1 - beta2), 13) 591 3.0183961e-06 592 >>> 593 >>> np.random.seed(42) 594 >>> X = np.random.rand(100, 50) 595 >>> y = np.random.rand(100, 1) 596 >>> A = sparse.csr_matrix((50, 50)) 597 >>> function = LinearRegressionL1L2TV(X, y, 0.1, 0.0, 0.0, 598 ... A=[A], mu=0.0) 599 >>> static_conesta = StaticCONESTA(max_iter=10000) 600 >>> beta1 = static_conesta.run(function, np.random.rand(50, 1)) 601 >>> beta2 = np.dot(np.linalg.pinv(X), y) 602 >>> round(np.linalg.norm(beta1 - beta2), 13) 603 0.8272329573827 604 >>> np.linalg.norm(beta2.ravel(), 0) 605 50 606 >>> np.linalg.norm(beta1.ravel(), 0) 607 7 608 >>> 609 >>> np.random.seed(42) 610 >>> X = np.random.rand(100, 50) 611 >>> y = np.random.rand(100, 1) 612 >>> A = l1tv.linear_operator_from_shape((1, 1, 50), 50) 613 >>> function = LinearRegressionL1L2TV(X, y, 0.1, 0.1, 0.1, 614 ... A=A, mu=0.0) 615 >>> static_conesta = StaticCONESTA(max_iter=10000) 616 >>> beta1 = static_conesta.run(function, np.zeros((50, 1))) 617 >>> beta2 = np.dot(np.linalg.pinv(X), y) 618 >>> round(np.linalg.norm(beta1 - beta2), 13) 619 0.9662907379987 620 """ 621 INTERFACES = [properties.NesterovFunction, 622 properties.StepSize, 623 properties.ProximalOperator, 624 properties.Continuation, 625 properties.DualFunction] 626 627 INFO_PROVIDED = [Info.ok, 628 Info.converged, 629 Info.num_iter, 630 Info.continuations, 631 Info.time, 632 Info.fvalue, 633 Info.func_val, 634 Info.mu] 635

636 - def __init__(self, mu_min=consts.TOLERANCE, tau=0.5, exponent=1.5, 637 info=[], eps=consts.TOLERANCE, max_iter=10000, min_iter=1, 638 simulation=False):

639 640 super(StaticCONESTA, self).__init__(info=info, 641 max_iter=max_iter, 642 min_iter=min_iter) 643 644 self.mu_min = max(consts.FLOAT_EPSILON, float(mu_min)) 645 self.tau = max(consts.TOLERANCE, 646 min(float(tau), 1.0 - consts.TOLERANCE)) 647 self.exponent = max(1.001, min(float(exponent), 2.0)) 648 self.eps = max(consts.TOLERANCE, float(eps)) 649 self.simulation = bool(simulation) 650 651 self._harmonic = None

652

653 - def _harmonic_number_approx(self):

654 655 if self._harmonic is None: 656 x = [1.001, 1.005, 1.01, 1.025, 1.05, 1.075, 1.1, 1.2, 1.3, 1.4, 657 1.5, 1.6, 1.7, 1.8, 1.9, 2.0] 658 y = [1000.58, 200.578, 100.578, 40.579, 20.5808, 13.916, 10.5844, 659 5.59158, 3.93195, 3.10555, 2.61238, 2.28577, 2.05429, 1.88223, 660 1.74975, 1.64493] 661 662 f = interp1(x, y) 663 664 self._harmonic = f(self.exponent) 665 666 return self._harmonic

667

668 - def _approximate_eps(self, function, beta0):

669 old_mu = function.set_mu(self.mu_min) 670 671 step = function.step(beta0) 672 D1 = maths.norm(function.prox(-step * function.grad(beta0), 673 step, 674 # Arbitrary eps ... 675 eps=np.sqrt(consts.TOLERANCE), 676 max_iter=self.max_iter)) 677 function.set_mu(old_mu) 678 679 return (2.0 / step) * D1 * self._harmonic_number_approx()

680 681 @bases.force_reset 682 @bases.check_compatibility

683 - def run(self, function, beta):

684 685 # Copy the allowed info keys for FISTA. 686 fista_info = list() 687 for nfo in self.info_copy(): 688 if nfo in FISTA.INFO_PROVIDED: 689 fista_info.append(nfo) 690 691 # Create the inner algorithm. 692 algorithm = FISTA(info=fista_info, eps=self.eps, 693 max_iter=self.max_iter, min_iter=self.min_iter) 694 695 # Not ok until the end. 696 if self.info_requested(Info.ok): 697 self.info_set(Info.ok, False) 698 699 # Time the init computation. 700 if self.info_requested(Info.time): 701 init_time = utils.time() 702 703 # Estimate the initial precision, eps, and the smoothing parameter mu. 704 gM = function.eps_max(1.0) # gamma * M 705 if maths.norm(beta) > 0.0: 706 mu = function.estimate_mu(beta) 707 eps = mu * gM 708 else: 709 eps = self._approximate_eps(function, beta) 710 mu = eps / gM 711 712 function.set_mu(mu) 713 714 # Initialise info variables. Info variables have the suffix "_". 715 if self.info_requested(Info.time): 716 t_ = [] 717 init_time = utils.time() - init_time 718 if self.info_requested(Info.fvalue) \ 719 or self.info_requested(Info.func_val): 720 f_ = [] 721 if self.info_requested(Info.converged): 722 self.info_set(Info.converged, False) 723 if self.info_requested(Info.mu): 724 mu_ = [] 725 726 i = 0 # Iteration counter. 727 while True: 728 converged = False 729 730 # Give current parameters to the algorithm. 731 algorithm.set_params(eps=eps, 732 max_iter=self.max_iter - self.num_iter) 733 # Run FISTA. 734 beta_new = algorithm.run(function, beta) 735 736 # Update global iteration count. 737 self.num_iter += algorithm.num_iter 738 739 # Get info from algorithm. 740 if Info.time in algorithm.info and \ 741 self.info_requested(Info.time): 742 t_ += algorithm.info_get(Info.time) 743 if i == 0: # Add init time to first iteration. 744 t_[0] += init_time 745 if Info.func_val in algorithm.info \ 746 and self.info_requested(Info.func_val): 747 f_ += algorithm.info_get(Info.func_val) 748 elif Info.fvalue in algorithm.info \ 749 and self.info_requested(Info.fvalue): 750 f_ += algorithm.info_get(Info.fvalue) 751 if self.info_requested(Info.mu): 752 mu_ += [mu] * algorithm.num_iter 753 754 # Unless this is a simulation, you want the algorithm to stop when 755 # it has converged. 756 if not self.simulation: 757 758 # Stopping criterion. 759 step = function.step(beta_new) 760 if maths.norm(beta_new - beta) < step * self.eps: 761 762 if self.info_requested(Info.converged): 763 self.info_set(Info.converged, True) 764 765 converged = True 766 767 beta = beta_new 768 769 # All combined stopping criteria. 770 if (converged or self.num_iter >= self.max_iter) \ 771 and self.num_iter >= self.min_iter: 772 break 773 774 # Update the precision eps. 775 eps = self.tau * eps 776 # Compute and update mu. 777 mu = max(self.mu_min, eps / gM) 778 function.set_mu(mu) 779 780 i = i + 1 781 782 if self.info_requested(Info.num_iter): 783 self.info_set(Info.num_iter, self.num_iter) 784 if self.info_requested(Info.continuations): 785 self.info_set(Info.continuations, i + 1) 786 if self.info_requested(Info.time): 787 self.info_set(Info.time, t_) 788 if self.info_requested(Info.fvalue): 789 self.info_set(Info.fvalue, f_) 790 if self.info_requested(Info.mu): 791 self.info_set(Info.mu, mu_) 792 if self.info_requested(Info.ok): 793 self.info_set(Info.ok, True) 794 795 return beta

796

797 798 #class ProjectionADMM(bases.ExplicitAlgorithm): 799 # """ The Alternating direction method of multipliers, where the functions 800 # have projection operators onto the corresponding convex sets. 801 # """ 802 # INTERFACES = [properties.Function, 803 # properties.ProjectionOperator] 804 # 805 # def __init__(self, output=False, 806 # eps=consts.TOLERANCE, 807 # max_iter=consts.MAX_ITER, min_iter=1): 808 # 809 # self.output = output 810 # self.eps = eps 811 # self.max_iter = max_iter 812 # self.min_iter = min_iter 813 # 814 # def run(self, function, x): 815 # """Finds the projection onto the intersection of two sets. 816 # 817 # Parameters 818 # ---------- 819 # function : List or tuple with two Functions. The two functions. 820 # 821 # x : Numpy array. The point that we wish to project. 822 # """ 823 # self.check_compatibility(function[0], self.INTERFACES) 824 # self.check_compatibility(function[1], self.INTERFACES) 825 # 826 # z = x 827 # u = np.zeros(x.shape) 828 # for i in xrange(1, self.max_iter + 1): 829 # x = function[0].proj(z - u) 830 # z = function[1].proj(x + u) 831 # u = u + x - z 832 # 833 # if maths.norm(z - x) / maths.norm(z) < self.eps \ 834 # and i >= self.min_iter: 835 # break 836 # 837 # return z 838 839 840 -class ADMM(bases.ExplicitAlgorithm, 841 bases.IterativeAlgorithm, 842 bases.InformationAlgorithm):

843 """The alternating direction method of multipliers (ADMM). Computes the 844 minimum of the sum of two functions with associated proximal or projection 845 operators. Solves problems on the form 846 847 min. f(x, y) = g(x) + h(y) 848 s.t. y = x 849 850 The functions have associated proximal or projection operators. 851 852 Parameters 853 ---------- 854 rho : Positive float. The penalty parameter. 855 856 mu : Float, greater than 1. The factor within which the primal and dual 857 variables should be kept. Set to less than or equal to 1 if you 858 don't want to update the penalty parameter rho dynamically. 859 860 tau : Float, greater than 1. Increase rho by a factor tau. 861 862 info : List or tuple of utils.consts.Info. What, if any, extra run 863 information should be stored. Default is an empty list, which means 864 that no run information is computed nor returned. 865 866 eps : Positive float. Tolerance for the stopping criterion. 867 868 max_iter : Non-negative integer. Maximum allowed number of iterations. 869 870 min_iter : Non-negative integer less than or equal to max_iter. Minimum 871 number of iterations that must be performed. Default is 1. 872 """ 873 INTERFACES = [properties.SplittableFunction, 874 properties.AugmentedProximalOperator, 875 properties.OR(properties.ProximalOperator, 876 properties.ProjectionOperator)] 877 878 INFO_PROVIDED = [Info.ok, 879 Info.num_iter, 880 Info.time, 881 Info.fvalue, 882 Info.converged] 883

884 - def __init__(self, rho=1.0, mu=10.0, tau=2.0, 885 info=[], 886 eps=consts.TOLERANCE, max_iter=consts.MAX_ITER, min_iter=1, 887 simulation=False):

888 # TODO: Investigate what is a good default value here! 889 890 super(ADMM, self).__init__(info=info, 891 max_iter=max_iter, 892 min_iter=min_iter) 893 894 self.rho = max(consts.FLOAT_EPSILON, float(rho)) 895 self.mu = max(1.0, float(mu)) 896 self.tau = max(1.0, float(tau)) 897 898 self.eps = max(consts.FLOAT_EPSILON, float(eps)) 899 900 self.simulation = bool(simulation)

901 902 @bases.force_reset 903 @bases.check_compatibility

904 - def run(self, functions, xy):

905 """Finds the minimum of two functions with associated proximal 906 operators. 907 908 Parameters 909 ---------- 910 functions : List or tuple with two Functions or a SplittableFunction. 911 The two functions. 912 913 xy : List or tuple with two elements, numpy arrays. The starting points 914 for the minimisation. 915 """ 916 if self.info_requested(Info.ok): 917 self.info_set(Info.ok, False) 918 919 if self.info_requested(Info.time): 920 t = [] 921 if self.info_requested(Info.fvalue): 922 f = [] 923 if self.info_requested(Info.converged): 924 self.info_set(Info.converged, False) 925 926 funcs = [functions.g, functions.h] 927 928 x_new = xy[0] 929 y_new = xy[1] 930 z_new = x_new.copy() 931 u_new = y_new.copy() 932 for i in xrange(1, self.max_iter + 1): 933 934 if self.info_requested(Info.time): 935 tm = utils.time_cpu() 936 937 x_old = x_new 938 z_old = z_new 939 u_old = u_new 940 941 if isinstance(funcs[0], properties.ProximalOperator): 942 x_new = funcs[0].prox(z_old - u_old) 943 else: 944 x_new = funcs[0].proj(z_old - u_old) 945 946 y_new = x_new # TODO: Allow a linear operator here. 947 948 if isinstance(funcs[1], properties.ProximalOperator): 949 z_new = funcs[1].prox(y_new + u_old) 950 else: 951 z_new = funcs[1].proj(y_new + u_old) 952 953 # The order here is important! Do not change! 954 u_new = (y_new - z_new) + u_old 955 956 if self.info_requested(Info.time): 957 t.append(utils.time_cpu() - tm) 958 if self.info_requested(Info.fvalue): 959 fval = funcs[0].f(z_new) + funcs[1].f(z_new) 960 f.append(fval) 961 962 if not self.simulation: 963 if i == 1: 964 if maths.norm(x_new - x_old) < self.eps \ 965 and i >= self.min_iter: 966 # print "Stopping criterion kicked in!" 967 if self.info_requested(Info.converged): 968 self.info_set(Info.converged, True) 969 970 break 971 else: 972 if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ 973 and i >= self.min_iter: 974 # print "Stopping criterion kicked in!" 975 if self.info_requested(Info.converged): 976 self.info_set(Info.converged, True) 977 978 break 979 980 # Update the penalty parameter, rho, dynamically. 981 if self.mu > 1.0: 982 r = x_new - z_new 983 s = (z_new - z_old) * -self.rho 984 norm_r = maths.norm(r) 985 norm_s = maths.norm(s) 986 # print "norm(r): ", norm_r, ", norm(s): ", norm_s, ", rho:", \ 987 # self.rho 988 989 if norm_r > self.mu * norm_s: 990 self.rho *= self.tau 991 u_new *= 1.0 / self.tau # Rescale dual variable. 992 elif norm_s > self.mu * norm_r: 993 self.rho /= self.tau 994 u_new *= self.tau # Rescale dual variable. 995 996 # Update the penalty parameter in the functions. 997 functions.set_rho(self.rho) 998 999 self.num_iter = i 1000 1001 if self.info_requested(Info.num_iter): 1002 self.info_set(Info.num_iter, i) 1003 if self.info_requested(Info.time): 1004 self.info_set(Info.time, t) 1005 if self.info_requested(Info.fvalue): 1006 self.info_set(Info.fvalue, f) 1007 if self.info_requested(Info.ok): 1008 self.info_set(Info.ok, True) 1009 1010 return z_new

1011

1012 1013 -class DykstrasProximalAlgorithm(bases.ExplicitAlgorithm):

1014 """Dykstra's proximal algorithm. Computes the minimum of the sum of two 1015 proximal operators. 1016 1017 The functions have proximal operators (ProjectionOperator.prox). 1018 """ 1019 INTERFACES = [properties.Function, 1020 properties.ProximalOperator] 1021

1022 - def __init__(self, eps=consts.TOLERANCE, 1023 max_iter=1000, min_iter=1):

1024 # TODO: Investigate what is a good default value here! 1025 1026 self.eps = eps 1027 self.max_iter = max_iter 1028 self.min_iter = min_iter

1029

1030 - def run(self, function, x):

1031 """Finds the proximal operator of the sum of two proximal operators. 1032 1033 Parameters 1034 ---------- 1035 function : List or tuple with two Functions. The two functions. 1036 1037 x : Numpy array. The point that we wish to compute the proximal 1038 operator of. 1039 """ 1040 self.check_compatibility(function[0], self.INTERFACES) 1041 self.check_compatibility(function[1], self.INTERFACES) 1042 1043 x_new = x 1044 p_new = np.zeros(x.shape) 1045 q_new = np.zeros(x.shape) 1046 for i in xrange(1, self.max_iter + 1): 1047 1048 x_old = x_new 1049 p_old = p_new 1050 q_old = q_new 1051 1052 y_old = function[0].prox(x_old + p_old) 1053 p_new = x_old + p_old - y_old 1054 x_new = function[1].prox(y_old + q_old) 1055 q_new = y_old + q_old - x_new 1056 1057 if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ 1058 and i >= self.min_iter: 1059 break 1060 1061 return x_new

1062

1063 1064 -class DykstrasProjectionAlgorithm(bases.ExplicitAlgorithm):

1065 """Dykstra's projection algorithm. Computes the projection onto the 1066 intersection of two convex sets. 1067 1068 The functions have projection operators (ProjectionOperator.proj) onto the 1069 corresponding convex sets. 1070 """ 1071 INTERFACES = [properties.Function, 1072 properties.ProjectionOperator] 1073

1074 - def __init__(self, eps=consts.TOLERANCE, 1075 max_iter=1000, min_iter=1):

1076 # TODO: Investigate what is a good default value here! 1077 1078 self.eps = eps 1079 self.max_iter = max_iter 1080 self.min_iter = min_iter

1081

1082 - def run(self, function, x):

1083 """Finds the projection onto the intersection of two sets. 1084 1085 Parameters 1086 ---------- 1087 function : List or tuple with two Functions. The two functions. 1088 1089 x : Numpy array. The point that we wish to project. 1090 """ 1091 self.check_compatibility(function[0], self.INTERFACES) 1092 self.check_compatibility(function[1], self.INTERFACES) 1093 1094 x_new = x 1095 p_new = np.zeros(x.shape) 1096 q_new = np.zeros(x.shape) 1097 for i in xrange(1, self.max_iter + 1): 1098 1099 x_old = x_new 1100 p_old = p_new 1101 q_old = q_new 1102 1103 y_old = function[0].proj(x_old + p_old) 1104 p_new = x_old + p_old - y_old 1105 x_new = function[1].proj(y_old + q_old) 1106 q_new = y_old + q_old - x_new 1107 1108 if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ 1109 and i >= self.min_iter: 1110 break 1111 1112 return x_new

1113

1114 1115 -class ParallelDykstrasProjectionAlgorithm(bases.ExplicitAlgorithm):

1116 """Dykstra's projection algorithm for two or more functions. Computes the 1117 projection onto the intersection of two or more convex sets. 1118 1119 The functions have projection operators (ProjectionOperator.proj) onto the 1120 respective convex sets. 1121 """ 1122 INTERFACES = [properties.Function, 1123 properties.ProjectionOperator] 1124

1125 - def __init__(self, eps=consts.TOLERANCE, 1126 max_iter=100, min_iter=1):

1127 # TODO: Investigate what is a good default value here! 1128 1129 self.eps = eps 1130 self.max_iter = max_iter 1131 self.min_iter = min_iter

1132

1133 - def run(self, functions, x, weights=None):

1134 """Finds the projection onto the intersection of two sets. 1135 1136 Parameters 1137 ---------- 1138 functions : List or tuple with two or more elements. The functions. 1139 1140 x : Numpy array. The point that we wish to project. 1141 1142 weights : List or tuple with floats. Weights for the functions. 1143 Default is that they all have the same weight. The elements of 1144 the list or tuple must sum to 1. 1145 """ 1146 for f in functions: 1147 self.check_compatibility(f, self.INTERFACES) 1148 1149 num = len(functions) 1150 1151 if weights is None: 1152 weights = [1.0 / float(num)] * num 1153 1154 x_new = x_old = x 1155 p = [0.0] * len(functions) 1156 z = [0.0] * len(functions) 1157 for i in xrange(num): 1158 z[i] = np.copy(x) 1159 1160 for i in xrange(1, self.max_iter + 1): 1161 1162 for i in xrange(num): 1163 p[i] = functions[i].proj(z[i]) 1164 1165 # TODO: Does the weights really matter when the function is the 1166 # indicator function? 1167 x_old = x_new 1168 x_new = np.zeros(x_old.shape) 1169 for i in xrange(num): 1170 x_new += weights[i] * p[i] 1171 1172 for i in xrange(num): 1173 z[i] = x + z[i] - p[i] 1174 1175 if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ 1176 and i >= self.min_iter: 1177 break 1178 1179 return x_new

1180

1181 1182 -class ParallelDykstrasProximalAlgorithm(bases.ExplicitAlgorithm):

1183 """Dykstra's projection algorithm for two or more functions. Computes the 1184 proximal operator of a sum of functions. These functions may be indicator 1185 functions for convex sets (ProjectionOperator) or ProximalOperators. 1186 1187 If all functions are ProjectionOperators, this algorithm finds the 1188 projection onto the intersection of the convex sets. 1189 1190 The functions have projection operators (ProjectionOperator.proj) onto the 1191 respective convex sets or proximal operators (ProximalOperator.prox). 1192 """ 1193 INTERFACES = [properties.Function, 1194 properties.OR(properties.ProjectionOperator, 1195 properties.ProximalOperator)] 1196

1197 - def __init__(self, eps=consts.TOLERANCE, 1198 max_iter=100, min_iter=1):

1199 # TODO: Investigate what is a good default value here! 1200 1201 self.eps = eps 1202 self.max_iter = max_iter 1203 self.min_iter = min_iter

1204

1205 - def run(self, x, prox=[], proj=[], factor=1.0, weights=None):

1206 """Finds the projection onto the intersection of two sets. 1207 1208 Parameters 1209 ---------- 1210 prox : List or tuple with two or more elements. The functions that 1211 are ProximalOperators. Either prox or proj must be non-empty. 1212 1213 proj : List or tuple with two or more elements. The functions that 1214 are ProjectionOperators. Either proj or prox must be non-empty. 1215 1216 factor : Positive float. A factor by which the Lagrange multiplier is 1217 scaled. This is usually the step size. 1218 1219 x : Numpy array. The point that we wish to project. 1220 1221 weights : List or tuple with floats. Weights for the functions. 1222 Default is that they all have the same weight. The elements of 1223 the list or tuple must sum to 1. 1224 """ 1225 for f in prox: 1226 self.check_compatibility(f, self.INTERFACES) 1227 1228 for f in proj: 1229 self.check_compatibility(f, self.INTERFACES) 1230 1231 num_prox = len(prox) 1232 num_proj = len(proj) 1233 1234 if weights is None: 1235 weights = [1. / float(num_prox + num_proj)] * (num_prox + num_proj) 1236 1237 x_new = x_old = x 1238 p = [0.0] * (num_prox + num_proj) 1239 z = [0.0] * (num_prox + num_proj) 1240 for i in xrange(num_prox + num_proj): 1241 z[i] = np.copy(x) 1242 1243 for i in xrange(1, self.max_iter + 1): 1244 1245 for i in xrange(num_prox): 1246 p[i] = prox[i].prox(z[i], factor) 1247 for i in xrange(num_proj): 1248 p[num_prox + i] = proj[i].proj(z[num_prox + i]) 1249 1250 x_old = x_new 1251 x_new = np.zeros(x_old.shape) 1252 for i in xrange(num_prox + num_proj): 1253 x_new += weights[i] * p[i] 1254 1255 if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ 1256 and i >= self.min_iter: 1257 1258 all_feasible = True 1259 for i in xrange(num_proj): 1260 if proj[i].f(p[num_prox + i]) > 0.0: 1261 all_feasible = False 1262 1263 if all_feasible: 1264 break 1265 1266 for i in xrange(num_prox + num_proj): 1267 z[i] = x_new + z[i] - p[i] 1268 1269 return x_new

1270 1271 if __name__ == "__main__": 1272 import doctest 1273 doctest.testmod() 1274

Source Code for Module parsimony.algorithms.proximal