parsimony.estimators

53 """Base class for estimators. 54 55 Parameters 56 ---------- 57 algorithm : BaseAlgorithm. The algorithm that will be used. 58 """ 59 __metaclass__ = abc.ABCMeta 60

61 - def __init__(self, algorithm):

62 63 self.algorithm = algorithm

64

65 - def set_params(self, **kwargs):

66 """Set the given input parameters in the estimator. 67 """ 68 for k in kwargs: 69 self.__setattr__(k, kwargs[k])

70 71 @abc.abstractmethod

72 - def get_params(self):

73 """Return a dictionary containing the estimator's own input parameters. 74 """ 75 raise NotImplementedError('Abstract method "get_params" must be ' 76 'specialised!')

77

78 - def fit(self, X):

79 """Fit the estimator to the data. 80 """ 81 raise NotImplementedError('Abstract method "fit" must be ' 82 'specialised!')

83 84 @abc.abstractmethod

85 - def predict(self, X):

86 """Perform prediction using the fitted parameters. 87 """ 88 raise NotImplementedError('Abstract method "predict" must be ' 89 'specialised!')

90 91 # TODO: Make all estimators implement this method! 92 # @abc.abstractmethod

93 - def parameters(self):

94 """Returns the estimator's fitted parameters, e.g. the regression 95 coefficients. 96 97 What is returned depends on the estimator. See the estimator's 98 documentation. 99 """ 100 raise NotImplementedError('Abstract method "parameters" must be ' 101 'specialised!')

102 103 # TODO: Is this a good name? 104 @abc.abstractmethod

105 - def score(self, X, y):

106 raise NotImplementedError('Abstract method "score" must be ' 107 'specialised!')

108 109 # TODO: Why is this here? Move to InformationAlgorithm?

110 - def get_info(self):

111 """If an InformationAlgorithm, returns the information dictionary. 112 """ 113 if not isinstance(self.algorithm, bases.InformationAlgorithm): 114 raise AttributeError("Algorithm is not an " \ 115 "InformationAlgorithm.") 116 117 return self.algorithm.info_get()

165 """Base estimator for logistic regression estimation 166 167 Parameters 168 ---------- 169 algorithm : ExplicitAlgorithm. The algorithm that will be applied. 170 171 start_vector : Numpy array. Generates the start vector that will be used. 172 173 class_weight : {dict, "auto"}, optional. Set the parameter weight of 174 sample belonging to class i to class_weight[i]. If not given, all 175 classes are supposed to have weight one. The "auto" mode uses the 176 values of y to automatically adjust weights inversely proportional 177 to class frequencies. 178 """ 179 __metaclass__ = abc.ABCMeta 180

181 - def __init__(self, algorithm, 182 start_vector=start_vectors.RandomStartVector(), 183 class_weight=None):

184 185 super(LogisticRegressionEstimator, self).__init__(algorithm=algorithm) 186 187 self.start_vector = start_vector 188 self.class_weight = class_weight

189 190 @abc.abstractmethod

191 - def fit(self, X, y):

192 """Fit the model to the data. 193 """ 194 raise NotImplementedError('Abstract method "fit" must be ' 195 'specialised!')

196

197 - def predict(self, X):

198 """Return a predicted y corresponding to the X given and the beta 199 previously determined. 200 """ 201 X = check_arrays(X) 202 prob = self.predict_probability(X) 203 y = np.ones((X.shape[0], 1)) 204 y[prob < 0.5] = 0.0 205 206 return y

207

208 - def predict_probability(self, X):

209 X = check_arrays(X) 210 logit = np.dot(X, self.beta) 211 # prob = 1.0 / (1.0 + np.exp(-logit)) 212 prob = np.reciprocal(1.0 + np.exp(-logit)) 213 214 return prob

215

216 - def score(self, X, y):

217 """Rate of correct classification. 218 """ 219 yhat = self.predict(X) 220 rate = np.mean(y == yhat) 221 222 return rate

226 """Linear regression: 227 228 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2, 229 230 where ||.||²_2 is the squared L2-norm. 231 232 Parameters 233 ---------- 234 algorithm : ExplicitAlgorithm. The algorithm that should be used. 235 Should be one of: 236 1. GradientDescent(...) 237 238 Default is GradientDescent(...). 239 240 algorithm_params : A dict. The dictionary algorithm_params contains 241 parameters that should be set in the algorithm. Passing 242 algorithm=GradientDescent(**params) is equivalent to passing 243 algorithm=GradientDescent() and algorithm_params=params. Default 244 is an empty dictionary. 245 246 start_vector : BaseStartVector. Generates the start vector that will be 247 used. 248 249 mean : Boolean. Whether to compute the squared loss or the mean squared 250 loss. Default is True, the mean squared loss. 251 252 Examples 253 -------- 254 >>> import numpy as np 255 >>> import parsimony.estimators as estimators 256 >>> import parsimony.algorithms.gradient as gradient 257 >>> 258 >>> np.random.seed(42) 259 >>> 260 >>> n = 10 261 >>> p = 16 262 >>> X = np.random.rand(n, p) 263 >>> y = np.random.rand(n, 1) 264 >>> lr = estimators.LinearRegression(algorithm=gradient.GradientDescent(), 265 ... algorithm_params=dict(max_iter=1000), 266 ... mean=False) 267 >>> error = lr.fit(X, y).score(X, y) 268 >>> print "error = ", error 269 error = 0.0116466703591 270 """

271 - def __init__(self, algorithm=None, algorithm_params=dict(), 272 start_vector=start_vectors.RandomStartVector(), 273 mean=True):

274 275 if algorithm is None: 276 algorithm = gradient.GradientDescent(**algorithm_params) 277 else: 278 algorithm.set_params(**algorithm_params) 279 280 super(LinearRegression, self).__init__(algorithm=algorithm, 281 start_vector=start_vector) 282 283 self.mean = bool(mean)

284

285 - def get_params(self):

286 """Return a dictionary containing the estimator's parameters 287 """ 288 return {"mean": self.mean}

289

290 - def fit(self, X, y, beta=None):

291 """Fit the estimator to the data. 292 """ 293 X, y = check_arrays(X, y) 294 295 function = losses.LinearRegression(X, y, mean=self.mean) 296 297 self.algorithm.check_compatibility(function, 298 self.algorithm.INTERFACES) 299 300 # TODO: Should we use a seed here so that we get deterministic results? 301 if beta is None: 302 beta = self.start_vector.get_vector(X.shape[1]) 303 304 self.beta = self.algorithm.run(function, beta) 305 306 return self

307

308 - def score(self, X, y):

309 """Returns the (mean) squared error of the estimator. 310 """ 311 X, y = check_arrays(X, y) 312 n, p = X.shape 313 y_hat = np.dot(X, self.beta) 314 err = np.sum((y_hat - y) ** 2.0) 315 if self.mean: 316 err /= float(n) 317 318 return err

322 """Linear regression with an L2 penalty. Represents the function: 323 324 f(beta, X, y) = (1 / (2 * n)) * ||X * beta - y||²_2 325 + (l / 2) * ||beta||²_2, 326 327 where ||.||²_2 is the squared L2-norm. 328 329 Parameters 330 ---------- 331 l : Non-negative float. The L2 regularisation parameter. 332 333 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 334 Should be one of: 335 1. FISTA(...) 336 2. ISTA(...) 337 338 Default is FISTA(...). 339 340 algorithm_params : A dict. The dictionary algorithm_params contains 341 parameters that should be set in the algorithm. Passing 342 algorithm=FISTA(**params) is equivalent to passing 343 algorithm=FISTA() and algorithm_params=params. Default is 344 an empty dictionary. 345 346 start_vector : BaseStartVector. Generates the start vector that will be 347 used. 348 349 penalty_start : Non-negative integer. The number of columns, variables 350 etc., to be exempt from penalisation. Equivalently, the first 351 index to be penalised. Default is 0, all columns are included. 352 353 mean : Boolean. Whether to compute the squared loss or the mean squared 354 loss. Default is True, the mean squared loss. 355 356 Examples 357 -------- 358 >>> import numpy as np 359 >>> import parsimony.estimators as estimators 360 >>> import parsimony.algorithms.proximal as proximal 361 >>> n = 10 362 >>> p = 16 363 >>> 364 >>> np.random.seed(42) 365 >>> X = np.random.rand(n, p) 366 >>> y = np.random.rand(n, 1) 367 >>> l = 0.618 # Regularisation coefficient 368 >>> rr = estimators.RidgeRegression(l, 369 ... algorithm=proximal.FISTA(), 370 ... algorithm_params=dict(max_iter=1000), 371 ... mean=False) 372 >>> error = rr.fit(X, y).score(X, y) 373 >>> print "error = ", error 374 error = 0.377679437659 375 """

376 - def __init__(self, l, algorithm=None, algorithm_params=dict(), 377 start_vector=start_vectors.RandomStartVector(), 378 penalty_start=0, mean=True):

379 380 if algorithm is None: 381 algorithm = proximal.FISTA(**algorithm_params) 382 else: 383 algorithm.set_params(**algorithm_params) 384 385 super(RidgeRegression, self).__init__(algorithm=algorithm, 386 start_vector=start_vector) 387 388 self.l = float(l) 389 390 self.penalty_start = int(penalty_start) 391 self.mean = bool(mean)

392

393 - def get_params(self):

394 """Return a dictionary containing the estimator's parameters. 395 """ 396 return {"l": self.l, 397 "penalty_start": self.penalty_start, "mean": self.mean}

398

399 - def fit(self, X, y, beta=None):

400 """Fit the estimator to the data. 401 """ 402 X, y = check_arrays(X, y) 403 404 function = functions.CombinedFunction() 405 function.add_function(losses.LinearRegression(X, y, mean=self.mean)) 406 function.add_penalty(penalties.L2Squared(l=self.l, 407 penalty_start=self.penalty_start)) 408 409 self.algorithm.check_compatibility(function, 410 self.algorithm.INTERFACES) 411 412 # TODO: Should we use a seed somewhere so that we get deterministic results? 413 if beta is None: 414 beta = self.start_vector.get_vector(X.shape[1]) 415 416 self.beta = self.algorithm.run(function, beta) 417 418 return self

419

420 - def score(self, X, y):

421 """Returns the (mean) squared error of the estimator. 422 """ 423 X, y = check_arrays(X, y) 424 n, p = X.shape 425 y_hat = np.dot(X, self.beta) 426 err = np.sum((y_hat - y) ** 2.0) 427 if self.mean: 428 err /= float(n) 429 430 return err

434 """Linear regression with an L1 penalty: 435 436 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 + l * ||beta||_1, 437 438 where ||.||_1 is the L1-norm. 439 440 Parameters 441 ---------- 442 l : Non-negative float. The Lagrange multiplier, or regularisation 443 constant, of the function. 444 445 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 446 Should be one of: 447 1. FISTA(...) 448 2. ISTA(...) 449 450 Default is FISTA(...). 451 452 algorithm_params : A dict. The dictionary algorithm_params contains 453 parameters that should be set in the algorithm. Passing 454 algorithm=FISTA(**params) is equivalent to passing 455 algorithm=FISTA() and algorithm_params=params. Default is 456 an empty dictionary. 457 458 start_vector : BaseStartVector. Generates the start vector that will be 459 used. 460 461 penalty_start : Non-negative integer. The number of columns, variables 462 etc., to be exempt from penalisation. Equivalently, the first 463 index to be penalised. Default is 0, all columns are included. 464 465 mean : Boolean. Whether to compute the squared loss or the mean squared 466 loss. Default is True, the mean squared loss. 467 468 Examples 469 -------- 470 >>> import numpy as np 471 >>> import parsimony.estimators as estimators 472 >>> import parsimony.algorithms.proximal as proximal 473 >>> n = 10 474 >>> p = 16 475 >>> 476 >>> np.random.seed(42) 477 >>> X = np.random.rand(n, p) 478 >>> y = np.random.rand(n, 1) 479 >>> l1 = 0.1 # L1 coefficient 480 >>> lasso = estimators.Lasso(l1, 481 ... algorithm=proximal.FISTA(), 482 ... algorithm_params=dict(max_iter=1000), 483 ... mean=False) 484 >>> error = lasso.fit(X, y).score(X, y) 485 >>> print "error = ", error 486 error = 0.395494642796 487 """

488 - def __init__(self, l, 489 algorithm=None, algorithm_params=dict(), 490 start_vector=start_vectors.RandomStartVector(), 491 penalty_start=0, 492 mean=True):

493 494 if algorithm is None: 495 algorithm = proximal.FISTA(**algorithm_params) 496 else: 497 algorithm.set_params(**algorithm_params) 498 499 super(Lasso, self).__init__(algorithm=algorithm, 500 start_vector=start_vector) 501 502 self.l = float(l) 503 504 self.penalty_start = int(penalty_start) 505 self.mean = bool(mean)

506

507 - def get_params(self):

508 """Return a dictionary containing the estimator's parameters 509 """ 510 return {"l": self.l, 511 "penalty_start": self.penalty_start, 512 "mean": self.mean}

513

514 - def fit(self, X, y, beta=None):

515 """Fit the estimator to the data. 516 """ 517 X, y = check_arrays(X, y) 518 519 function = functions.CombinedFunction() 520 function.add_function(losses.LinearRegression(X, y, mean=self.mean)) 521 function.add_prox(penalties.L1(l=self.l, 522 penalty_start=self.penalty_start)) 523 524 self.algorithm.check_compatibility(function, 525 self.algorithm.INTERFACES) 526 527 # TODO: Should we use a seed here so that we get deterministic results? 528 if beta is None: 529 beta = self.start_vector.get_vector(X.shape[1]) 530 531 self.beta = self.algorithm.run(function, beta) 532 533 return self

534

535 - def score(self, X, y):

536 """Returns the (mean) squared error of the estimator. 537 """ 538 X, y = check_arrays(X, y) 539 n, p = X.shape 540 y_hat = np.dot(X, self.beta) 541 err = np.sum((y_hat - y) ** 2.0) 542 if self.mean: 543 err /= float(n) 544 545 return err

549 """Linear regression with L1 and L2 penalties. Represents the function: 550 551 f(beta, X, y) = (1 / (2 * n)) * ||X * beta - y||²_2 552 + alpha * l * ||beta||_1 553 + alpha * ((1.0 - l) / 2) * ||beta||²_2, 554 555 where ||.||²_2 is the squared L2-norm and ||.||_1 is the L1-norm. 556 557 Parameters 558 ---------- 559 l : Non-negative float. The Lagrange multiplier, or regularisation 560 constant, of the function. 561 562 alpha : Non-negative float. Scaling parameter of the regularisation. 563 Default is 1. 564 565 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 566 Should be one of: 567 1. FISTA(...) 568 2. ISTA(...) 569 570 Default is FISTA(...). 571 572 algorithm_params : A dict. The dictionary algorithm_params contains 573 parameters that should be set in the algorithm. Passing 574 algorithm=FISTA(**params) is equivalent to passing 575 algorithm=FISTA() and algorithm_params=params. Default is 576 an empty dictionary. 577 578 start_vector : BaseStartVector. Generates the start vector that will be 579 used. 580 581 penalty_start : Non-negative integer. The number of columns, variables 582 etc., to be exempt from penalisation. Equivalently, the first 583 index to be penalised. Default is 0, all columns are included. 584 585 mean : Boolean. Whether to compute the squared loss or the mean squared 586 loss. Default is True, the mean squared loss. 587 588 Examples 589 -------- 590 >>> import numpy as np 591 >>> import parsimony.estimators as estimators 592 >>> import parsimony.algorithms.proximal as proximal 593 >>> n = 10 594 >>> p = 16 595 >>> 596 >>> np.random.seed(42) 597 >>> X = np.random.rand(n, p) 598 >>> y = np.random.rand(n, 1) 599 >>> l = 0.1 # Regularisation coefficient 600 >>> en = estimators.ElasticNet(l, 601 ... algorithm=proximal.FISTA(), 602 ... algorithm_params=dict(max_iter=1000), 603 ... mean=False) 604 >>> error = en.fit(X, y).score(X, y) 605 >>> print "error = ", round(error, 13) 606 error = 0.492096328053 607 """

608 - def __init__(self, l, alpha=1.0, algorithm=None, algorithm_params=dict(), 609 start_vector=start_vectors.RandomStartVector(), 610 penalty_start=0, mean=True):

611 612 if algorithm is None: 613 algorithm = proximal.FISTA(**algorithm_params) 614 else: 615 algorithm.set_params(**algorithm_params) 616 617 super(ElasticNet, self).__init__(algorithm=algorithm, 618 start_vector=start_vector) 619 620 self.l = float(l) 621 self.alpha = float(alpha) 622 623 self.penalty_start = int(penalty_start) 624 self.mean = bool(mean)

625

626 - def get_params(self):

627 """Return a dictionary containing the estimator's parameters. 628 """ 629 return {"l": self.l, "alpha": self.alpha, 630 "penalty_start": self.penalty_start, "mean": self.mean}

631

632 - def fit(self, X, y, beta=None):

633 """Fit the estimator to the data. 634 """ 635 X, y = check_arrays(X, y) 636 637 function = functions.CombinedFunction() 638 function.add_function(losses.LinearRegression(X, y, mean=self.mean)) 639 function.add_penalty(penalties.L2Squared(l=self.alpha * (1.0 - self.l), 640 penalty_start=self.penalty_start)) 641 function.add_prox(penalties.L1(l=self.alpha * self.l, 642 penalty_start=self.penalty_start)) 643 644 self.algorithm.check_compatibility(function, 645 self.algorithm.INTERFACES) 646 647 # TODO: Should we use a seed here so that we get deterministic results? 648 if beta is None: 649 beta = self.start_vector.get_vector(X.shape[1]) 650 651 self.beta = self.algorithm.run(function, beta) 652 653 return self

654

655 - def score(self, X, y):

656 """Returns the (mean) squared error of the estimator. 657 """ 658 X, y = check_arrays(X, y) 659 n, p = X.shape 660 y_hat = np.dot(X, self.beta) 661 err = np.sum((y_hat - y) ** 2.0) 662 if self.mean: 663 err /= float(n) 664 665 return err

669 """Linear regression with L1, L2 and TV penalties: 670 671 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 672 + l1 * ||beta||_1 673 + (l2 / 2) * ||beta||²_2 674 + tv * TV(beta) 675 676 Parameters 677 ---------- 678 l1 : Non-negative float. The L1 regularization parameter. 679 680 l2 : Non-negative float. The L2 regularization parameter. 681 682 tv : Non-negative float. The total variation regularization parameter. 683 684 A : Numpy or (usually) scipy.sparse array. The linear operator for the 685 smoothed total variation Nesterov function. A must be given. 686 687 mu : Non-negative float. The regularisation constant for the smoothing. 688 689 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 690 Should be one of: 691 1. CONESTA(...) 692 2. StaticCONESTA(...) 693 3. FISTA(...) 694 4. ISTA(...) 695 5. ADMM(...) 696 6. NaiveCONESTA(...) 697 698 Default is CONESTA(...). 699 700 algorithm_params : A dict. The dictionary algorithm_params contains 701 parameters that should be set in the algorithm. Passing 702 algorithm=CONESTA(**params) is equivalent to passing 703 algorithm=CONESTA() and algorithm_params=params. Default is an 704 empty dictionary. 705 706 penalty_start : Non-negative integer. The number of columns, variables 707 etc., to be exempt from penalisation. Equivalently, the first 708 index to be penalised. Default is 0, all columns are included. 709 710 mean : Boolean. Whether to compute the squared loss or the mean squared 711 loss. Default is True, the mean squared loss. 712 713 rho : Positive float. Regularisation constant only used in ADMM. Default 714 is 1.0. 715 716 Examples 717 -------- 718 >>> import numpy as np 719 >>> import parsimony.estimators as estimators 720 >>> import parsimony.algorithms.proximal as proximal 721 >>> import parsimony.functions.nesterov.tv as total_variation 722 >>> shape = (1, 4, 4) 723 >>> n = 10 724 >>> p = shape[0] * shape[1] * shape[2] 725 >>> 726 >>> np.random.seed(42) 727 >>> X = np.random.rand(n, p) 728 >>> y = np.random.rand(n, 1) 729 >>> l1 = 0.1 # L1 coefficient 730 >>> l2 = 0.9 # Ridge coefficient 731 >>> tv = 1.0 # TV coefficient 732 >>> A, n_compacts = total_variation.linear_operator_from_shape(shape) 733 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, 734 ... algorithm=proximal.StaticCONESTA(max_iter=1000), 735 ... mean=False) 736 >>> res = lr.fit(X, y) 737 >>> round(lr.score(X, y), 13) 738 0.0683842576534 739 >>> 740 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, 741 ... algorithm=proximal.CONESTA(max_iter=1000), 742 ... mean=False) 743 >>> res = lr.fit(X, y) 744 >>> round(lr.score(X, y), 13) 745 0.0683583406798 746 >>> 747 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, 748 ... algorithm=proximal.FISTA(max_iter=1000), 749 ... mean=False) 750 >>> lr = lr.fit(X, y) 751 >>> round(lr.score(X, y), 13) 752 1.5817577127184 753 >>> 754 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, 755 ... algorithm=proximal.ISTA(max_iter=1000), 756 ... mean=False) 757 >>> lr = lr.fit(X, y) 758 >>> round(lr.score(X, y), 14) 759 2.07583068899674 760 >>> 761 >>> import parsimony.functions.nesterov.l1tv as l1tv 762 >>> np.random.seed(1337) 763 >>> A = l1tv.linear_operator_from_shape(shape, p, penalty_start=0) 764 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, 765 ... algorithm=proximal.ADMM(max_iter=1000), 766 ... mean=False) 767 >>> lr = lr.fit(X, y) 768 >>> round(lr.score(X, y), 13) 769 0.0623552412543 770 """

771 - def __init__(self, l1, l2, tv, 772 A=None, mu=consts.TOLERANCE, 773 algorithm=None, algorithm_params=dict(), 774 penalty_start=0, 775 mean=True, 776 rho=1.0):

777 778 self.l1 = max(consts.TOLERANCE, float(l1)) 779 self.l2 = max(consts.TOLERANCE, float(l2)) 780 self.tv = max(consts.FLOAT_EPSILON, float(tv)) 781 782 if algorithm is None: 783 algorithm = proximal.CONESTA(**algorithm_params) 784 else: 785 algorithm.set_params(**algorithm_params) 786 787 if isinstance(algorithm, proximal.CONESTA) \ 788 and self.tv < consts.TOLERANCE: 789 algorithm = proximal.FISTA(**algorithm_params) 790 791 super(LinearRegressionL1L2TV, self).__init__(algorithm=algorithm) 792 793 if A is None: 794 raise TypeError("A may not be None.") 795 self.A = A 796 797 try: 798 self.mu = float(mu) 799 except (ValueError, TypeError): 800 self.mu = None 801 802 self.penalty_start = int(penalty_start) 803 self.mean = bool(mean) 804 self.rho = float(rho) 805 806 self.tv_function = None

807

808 - def get_params(self):

809 """Return a dictionary containing all the estimator's parameters 810 """ 811 return {"l1": self.l1, "l2": self.l2, "tv": self.tv, 812 "A": self.A, "mu": self.mu, 813 "penalty_start": self.penalty_start, "mean": self.mean, 814 "rho": self.rho}

815

816 - def fit(self, X, y, beta=None):

817 """Fit the estimator to the data. 818 """ 819 X, y = check_arrays(X, y) 820 821 if isinstance(self.algorithm, proximal.ADMM): 822 823 function = functions.AugmentedLinearRegressionL1L2TV(X, y, 824 self.l1, self.l2, self.tv, 825 A=self.A, 826 rho=self.rho, 827 penalty_start=self.penalty_start, 828 mean=self.mean) 829 830 # TODO: Should we use a seed here so that we get deterministic 831 # results? 832 p = X.shape[1] 833 if beta is None: 834 x = self.start_vector.get_vector(p) 835 r = self.start_vector.get_vector(2 * p) 836 else: 837 x = beta 838 # r = np.vstack((beta, np.zeros((p, 1)))) 839 r = np.vstack((np.zeros((p, 1)), 840 np.zeros((p, 1)))) 841 842 xr = linalgs.MultipartArray([x, r]) 843 beta = [xr, xr] 844 845 self.tv_function = functions.nesterov.tv.TotalVariation(self.tv, 846 A=self.A, 847 mu=self.mu, 848 penalty_start=self.penalty_start) 849 850 else: 851 function = functions.LinearRegressionL1L2TV(X, y, 852 self.l1, self.l2, self.tv, 853 A=self.A, 854 penalty_start=self.penalty_start, 855 mean=self.mean) 856 857 self.tv_function = function.tv 858 859 # TODO: Should we use a seed here so that we get deterministic 860 # results? 861 if beta is None: 862 beta = self.start_vector.get_vector(X.shape[1]) 863 864 self.algorithm.check_compatibility(function, 865 self.algorithm.INTERFACES) 866 867 if self.mu is None: 868 # self.mu = function.estimate_mu(beta) 869 self.mu = self.tv_function.estimate_mu(beta) 870 871 function.set_params(mu=self.mu) 872 self.beta = self.algorithm.run(function, beta) 873 874 if isinstance(self.algorithm, proximal.ADMM): 875 self.beta = self.beta.get_parts()[0] 876 877 return self

878

879 - def score(self, X, y):

880 """Return the mean squared error of the estimator. 881 """ 882 X, y = check_arrays(X, y) 883 n, p = X.shape 884 y_hat = np.dot(X, self.beta) 885 886 return np.sum((y_hat - y) ** 2.0) / float(n)

890 """Linear regression with L1, L2 and Group lasso penalties: 891 892 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 893 + l1 * ||beta||_1 894 + (l2 / 2) * ||beta||²_2 895 + gl * GL(beta) 896 897 Parameters 898 ---------- 899 l1 : Non-negative float. The L1 regularization parameter. 900 901 l2 : Non-negative float. The L2 regularization parameter. 902 903 tv : Non-negative float. The group lasso regularization parameter. 904 905 A : Numpy or (usually) scipy.sparse array. The linear operator for the 906 smoothed group lasso Nesterov function. A must be given. 907 908 mu : Non-negative float. The regularisation constant for the smoothing. 909 910 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 911 Should be one of: 912 1. FISTA(...) 913 2. ISTA(...) 914 3. StaticCONESTA(...) 915 916 Default is FISTA(...). 917 918 algorithm_params : A dict. The dictionary algorithm_params contains 919 parameters that should be set in the algorithm. Passing 920 algorithm=FISTA(**params) is equivalent to passing 921 algorithm=FISTA() and algorithm_params=params. Default is an empty 922 dictionary. 923 924 penalty_start : Non-negative integer. The number of columns, variables 925 etc., to be exempt from penalisation. Equivalently, the first 926 index to be penalised. Default is 0, all columns are included. 927 928 mean : Boolean. Whether to compute the squared loss or the mean squared 929 loss. Default is True, the mean squared loss. 930 931 Examples 932 -------- 933 >>> import numpy as np 934 >>> import parsimony.estimators as estimators 935 >>> import parsimony.algorithms.proximal as proximal 936 >>> import parsimony.functions.nesterov.gl as group_lasso 937 >>> n = 10 938 >>> p = 15 939 >>> 940 >>> np.random.seed(42) 941 >>> X = np.random.rand(n, p) 942 >>> y = np.random.rand(n, 1) 943 >>> l1 = 0.1 # L1 coefficient 944 >>> l2 = 0.9 # Ridge coefficient 945 >>> gl = 1.0 # GL coefficient 946 >>> groups = [range(0, 10), range(5, 15)] 947 >>> A = group_lasso.linear_operator_from_groups(p, groups, weights=None, 948 ... penalty_start=0) 949 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A, 950 ... algorithm=proximal.StaticCONESTA(), 951 ... algorithm_params=dict(max_iter=1000), 952 ... mean=False) 953 >>> res = lr.fit(X, y) 954 >>> round(lr.score(X, y), 13) 955 0.6101838224235 956 >>> 957 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A, 958 ... algorithm=proximal.CONESTA(), 959 ... algorithm_params=dict(max_iter=1000), 960 ... mean=False) 961 >>> res = lr.fit(X, y) 962 >>> round(lr.score(X, y), 11) 963 0.61139525439 964 >>> 965 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A, 966 ... algorithm=proximal.FISTA(), 967 ... algorithm_params=dict(max_iter=1000), 968 ... mean=False) 969 >>> lr = lr.fit(X, y) 970 >>> round(lr.score(X, y), 12) 971 10.7465249393 972 >>> 973 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A, 974 ... algorithm=proximal.ISTA(), 975 ... algorithm_params=dict(max_iter=1000), 976 ... mean=False) 977 >>> lr = lr.fit(X, y) 978 >>> round(lr.score(X, y), 14) 979 11.02462114246791 980 """

981 - def __init__(self, l1, l2, gl, 982 A=None, mu=consts.TOLERANCE, 983 algorithm=None, algorithm_params=dict(), 984 penalty_start=0, 985 mean=True):

986 987 self.l1 = max(consts.TOLERANCE, float(l1)) 988 self.l2 = max(consts.TOLERANCE, float(l2)) 989 self.gl = max(consts.FLOAT_EPSILON, float(gl)) 990 991 if algorithm is None: 992 algorithm = proximal.FISTA(**algorithm_params) 993 else: 994 algorithm.set_params(**algorithm_params) 995 996 if isinstance(algorithm, proximal.CONESTA) \ 997 and self.gl < consts.TOLERANCE: 998 algorithm = proximal.FISTA(**algorithm_params) 999 1000 super(LinearRegressionL1L2GL, self).__init__(algorithm=algorithm) 1001 1002 if A is None: 1003 raise TypeError("A may not be None.") 1004 self.A = A 1005 1006 try: 1007 self.mu = float(mu) 1008 except (ValueError, TypeError): 1009 self.mu = None 1010 1011 self.penalty_start = int(penalty_start) 1012 self.mean = bool(mean)

1013

1014 - def get_params(self):

1015 """Return a dictionary containing all the estimator's parameters. 1016 """ 1017 return {"l1": self.l1, "l2": self.l2, "gl": self.gl, 1018 "A": self.A, "mu": self.mu, 1019 "penalty_start": self.penalty_start, 1020 "mean": self.mean}

1021

1022 - def fit(self, X, y, beta=None):

1023 """Fit the estimator to the data 1024 """ 1025 X, y = check_arrays(X, y) 1026 1027 function = functions.LinearRegressionL1L2GL(X, y, 1028 self.l1, self.l2, self.gl, 1029 A=self.A, 1030 penalty_start=self.penalty_start, 1031 mean=self.mean) 1032 self.algorithm.check_compatibility(function, 1033 self.algorithm.INTERFACES) 1034 1035 # TODO: Should we use a seed here so that we get deterministic results? 1036 if beta is None: 1037 beta = self.start_vector.get_vector(X.shape[1]) 1038 1039 if self.mu is None: 1040 self.mu = function.estimate_mu(beta) 1041 1042 function.set_params(mu=self.mu) 1043 self.beta = self.algorithm.run(function, beta) 1044 1045 return self

1046

1047 - def score(self, X, y):

1048 """Return the (mean) squared error of the estimator. 1049 """ 1050 X, y = check_arrays(X, y) 1051 n, p = X.shape 1052 y_hat = np.dot(X, self.beta) 1053 err = np.sum((y_hat - y) ** 2.0) 1054 if self.mean: 1055 err /= float(n) 1056 1057 return err

1059 1060 #class RidgeRegression_L1_GL(RegressionEstimator): 1061 # """ 1062 # Parameters 1063 # ---------- 1064 # k : Non-negative float. The L2 regularisation parameter. 1065 # 1066 # l : Non-negative float. The L1 regularisation parameter. 1067 # 1068 # g : Non-negative float. The Group lasso regularisation parameter. 1069 # 1070 # A : Numpy or (usually) scipy.sparse array. The linear operator for the 1071 # smoothed group lasso Nesterov function. 1072 # 1073 # mu : Non-negative float. The regularisation constant for the smoothing. 1074 # 1075 # algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1076 # Should be one of: 1077 # 1. StaticCONESTA() 1078 # 2. DynamicCONESTA() 1079 # 3. FISTA() 1080 # 4. ISTA() 1081 # 1082 # penalty_start : Non-negative integer. The number of columns, variables 1083 # etc., to be exempt from penalisation. Equivalently, the first 1084 # index to be penalised. Default is 0, all columns are included. 1085 # 1086 # mean : Boolean. Whether to compute the squared loss or the mean 1087 # squared loss. Default is True, the mean squared loss. 1088 # 1089 # Examples 1090 # -------- 1091 ## >>> import numpy as np 1092 ## >>> import parsimony.estimators as estimators 1093 ## >>> import parsimony.algorithms.proximal as proximal 1094 ## >>> import parsimony.functions.nesterov.tv as tv 1095 ## >>> shape = (1, 4, 4) 1096 ## >>> num_samples = 10 1097 ## >>> num_ft = shape[0] * shape[1] * shape[2] 1098 ## >>> np.random.seed(seed=1) 1099 ## >>> X = np.random.random((num_samples, num_ft)) 1100 ## >>> y = np.random.randint(0, 2, (num_samples, 1)) 1101 ## >>> k = 0.9 # ridge regression coefficient 1102 ## >>> l = 0.1 # l1 coefficient 1103 ## >>> g = 1.0 # tv coefficient 1104 ## >>> A, n_compacts = tv.linear_operator_from_shape(shape) 1105 ## >>> ridge_l1_tv = estimators.RidgeRegression_L1_TV(k, l, g, A, 1106 ## ... algorithm=proximal.StaticCONESTA(max_iter=1000)) 1107 ## >>> res = ridge_l1_tv.fit(X, y) 1108 ## >>> error = np.sum(np.abs(np.dot(X, ridge_l1_tv.beta) - y)) 1109 ## >>> print "error = ", error 1110 ## error = 4.70079220678 1111 ## >>> ridge_l1_tv = estimators.RidgeRegression_L1_TV(k, l, g, A, 1112 ## ... algorithm=proximal.DynamicCONESTA(max_iter=1000)) 1113 ## >>> res = ridge_l1_tv.fit(X, y) 1114 ## >>> error = np.sum(np.abs(np.dot(X, ridge_l1_tv.beta) - y)) 1115 ## >>> print "error = ", error 1116 ## error = 4.70096544168 1117 ## >>> ridge_l1_tv = estimators.RidgeRegression_L1_TV(k, l, g, A, 1118 ## ... algorithm=proximal.FISTA(max_iter=1000)) 1119 ## >>> res = ridge_l1_tv.fit(X, y) 1120 ## >>> error = np.sum(np.abs(np.dot(X, ridge_l1_tv.beta) - y)) 1121 ## >>> print "error = ", error 1122 ## error = 4.24400179809 1123 # """ 1124 # def __init__(self, k, l, g, A, mu=None, 1125 # algorithm=StaticCONESTA(), 1126 ## algorithm=DynamicCONESTA()): 1127 ## algorithm=FISTA()): 1128 # penalty_start=0, mean=True): 1129 # 1130 # super(RidgeRegression_L1_GL, self).__init__(algorithm=algorithm) 1131 # 1132 # self.k = float(k) 1133 # self.l = float(l) 1134 # self.g = float(g) 1135 # self.A = A 1136 # try: 1137 # self.mu = float(mu) 1138 # except (ValueError, TypeError): 1139 # self.mu = None 1140 # self.penalty_start = int(penalty_start) 1141 # self.mean = bool(mean) 1142 # 1143 # def get_params(self): 1144 # """Return a dictionary containing all the estimator's parameters. 1145 # """ 1146 # return {"k": self.k, "l": self.l, "g": self.g, 1147 # "A": self.A, "mu": self.mu, 1148 # "penalty_start": self.penalty_start, 1149 # "mean": self.mean} 1150 # 1151 # def fit(self, X, y, beta=None): 1152 # """Fit the estimator to the data 1153 # """ 1154 # X, y = check_arrays(X, y) 1155 # self.function = functions.LinearRegressionL1L2GL(X, y, 1156 # self.k, self.l, self.g, 1157 # A=self.A, 1158 # penalty_start=self.penalty_start, 1159 # mean=self.mean) 1160 # self.algorithm.check_compatibility(self.function, 1161 # self.algorithm.INTERFACES) 1162 # 1163 # # TODO: Should we use a seed here so that we get deterministic results? 1164 # if beta is None: 1165 # beta = self.start_vector.get_vector(X.shape[1]) 1166 # 1167 # if self.mu is None: 1168 # self.mu = self.function.estimate_mu(beta) 1169 # else: 1170 # self.mu = float(self.mu) 1171 # 1172 # self.function.set_params(mu=self.mu) 1173 # self.beta = self.algorithm.run(self.function, beta) 1174 # 1175 # return self 1176 # 1177 # def score(self, X, y): 1178 # """Return the mean squared error of the estimator. 1179 # """ 1180 # X, y = check_arrays(X, y) 1181 # n, p = X.shape 1182 # y_hat = np.dot(X, self.beta) 1183 # return np.sum((y_hat - y) ** 2.0) / float(n) 1184 1185 1186 -class LogisticRegression(LogisticRegressionEstimator):

1187 """Logistic regression (re-weighted log-likelihood aka. cross-entropy): 1188 1189 f(beta) = -loglik / n_samples 1190 1191 where 1192 1193 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)), 1194 1195 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)), 1196 1197 wi = weight of sample i. 1198 1199 Parameters 1200 ---------- 1201 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1202 Should be one of: 1203 1. GradientDescent(...) 1204 1205 Default is GradientDescent(...). 1206 1207 algorithm_params : A dict. The dictionary algorithm_params contains 1208 parameters that should be set in the algorithm. Passing 1209 algorithm=MyAlgorithm(**params) is equivalent to passing 1210 algorithm=MyAlgorithm() and algorithm_params=params. Default is 1211 an empty dictionary. 1212 1213 class_weight : Dict, 'auto' or None. If 'auto', class weights will be 1214 given inverse proportional to the frequency of the class in 1215 the data. If a dictionary is given, keys are classes and values 1216 are corresponding class weights. If None is given, the class 1217 weights will be uniform. 1218 1219 penalty_start : Non-negative integer. The number of columns, variables 1220 etc., to be exempt from penalisation. Equivalently, the first 1221 index to be penalised. Default is 0, all columns are included. 1222 1223 mean : Boolean. Whether to compute the squared loss or the mean squared 1224 loss. Default is True, the mean squared loss. 1225 1226 Examples 1227 -------- 1228 >>> import numpy as np 1229 >>> import parsimony.estimators as estimators 1230 >>> import parsimony.algorithms.gradient as gradient 1231 >>> import parsimony.functions.nesterov.tv as total_variation 1232 >>> shape = (1, 4, 4) 1233 >>> n = 10 1234 >>> p = shape[0] * shape[1] * shape[2] 1235 >>> 1236 >>> np.random.seed(42) 1237 >>> X = np.random.rand(n, p) 1238 >>> y = np.random.randint(0, 2, (n, 1)) 1239 >>> lr = estimators.LogisticRegression( 1240 ... algorithm=gradient.GradientDescent(max_iter=1000), 1241 ... mean=False) 1242 >>> res = lr.fit(X, y) 1243 >>> error = lr.score(X, y) 1244 >>> print "error = ", error 1245 error = 1.0 1246 """

1247 - def __init__(self, algorithm=None, algorithm_params=dict(), 1248 class_weight=None, 1249 penalty_start=0, 1250 mean=True):

1251 1252 if algorithm is None: 1253 algorithm = gradient.GradientDescent(**algorithm_params) 1254 else: 1255 algorithm.set_params(**algorithm_params) 1256 1257 super(LogisticRegression, self).__init__(algorithm=algorithm, 1258 class_weight=class_weight) 1259 1260 self.penalty_start = int(penalty_start) 1261 self.mean = bool(mean)

1262

1263 - def get_params(self):

1264 """Return a dictionary containing all the estimator's parameters. 1265 """ 1266 return {"class_weight": self.class_weight, 1267 "penalty_start": self.penalty_start, "mean": self.mean}

1268

1269 - def fit(self, X, y, beta=None, sample_weight=None):

1270 """Fit the estimator to the data. 1271 """ 1272 X, y = check_arrays(X, check_labels(y)) 1273 if sample_weight is None: 1274 sample_weight = class_weight_to_sample_weight(self.class_weight, y) 1275 y, sample_weight = check_arrays(y, sample_weight) 1276 #sample_weight = sample_weight.ravel() 1277 1278 function = losses.LogisticRegression(X, y, 1279 weights=sample_weight, 1280 mean=self.mean) 1281 1282 self.algorithm.check_compatibility(function, 1283 self.algorithm.INTERFACES) 1284 1285 # TODO: Should we use a seed here so that we get deterministic results? 1286 if beta is None: 1287 beta = self.start_vector.get_vector(X.shape[1]) 1288 1289 self.beta = self.algorithm.run(function, beta) 1290 1291 return self

1295 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) with 1296 an L2 penalty: 1297 1298 f(beta) = -loglik / n_samples + (l / 2) * ||beta||²_2, 1299 1300 where 1301 1302 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)), 1303 1304 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)), 1305 1306 wi = weight of sample i, 1307 1308 and ||.||²_2 is the squared L2-norm. 1309 1310 Parameters 1311 ---------- 1312 l : Non-negative float. The L2 regularisation parameter. 1313 1314 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1315 Should be one of: 1316 1. GradientDescent(...) 1317 1318 Default is GradientDescent(...). 1319 1320 algorithm_params : A dict. The dictionary algorithm_params contains 1321 parameters that should be set in the algorithm. Passing 1322 algorithm=MyAlgorithm(**params) is equivalent to passing 1323 algorithm=MyAlgorithm() and algorithm_params=params. Default is 1324 an empty dictionary. 1325 1326 class_weight : Dict, 'auto' or None. If 'auto', class weights will be 1327 given inverse proportional to the frequency of the class in 1328 the data. If a dictionary is given, keys are classes and values 1329 are corresponding class weights. If None is given, the class 1330 weights will be uniform. 1331 1332 penalty_start : Non-negative integer. The number of columns, variables 1333 etc., to be exempt from penalisation. Equivalently, the first 1334 index to be penalised. Default is 0, all columns are included. 1335 1336 mean : Boolean. Whether to compute the mean loss or not. Default is True, 1337 the mean loss. 1338 1339 Examples 1340 -------- 1341 >>> import numpy as np 1342 >>> import parsimony.estimators as estimators 1343 >>> import parsimony.algorithms.gradient as gradient 1344 >>> n, p = 10, 16 1345 >>> 1346 >>> np.random.seed(1337) 1347 >>> X = np.random.rand(n, p) 1348 >>> y = np.random.randint(0, 2, (n, 1)) 1349 >>> l = 1.0 1350 >>> lr = estimators.RidgeLogisticRegression(l, 1351 ... algorithm=gradient.GradientDescent(max_iter=1000), 1352 ... mean=False) 1353 >>> res = lr.fit(X, y) 1354 >>> pred = lr.score(X, y) 1355 >>> print "prediction rate = %.1f" % pred 1356 prediction rate = 0.9 1357 """

1358 - def __init__(self, l, 1359 algorithm=None, algorithm_params=dict(), 1360 class_weight=None, 1361 penalty_start=0, 1362 mean=True):

1363 1364 self.l = max(0.0, float(l)) 1365 1366 if algorithm is None: 1367 algorithm = gradient.GradientDescent(**algorithm_params) 1368 else: 1369 algorithm.set_params(**algorithm_params) 1370 1371 super(RidgeLogisticRegression, self).__init__(algorithm=algorithm, 1372 class_weight=class_weight) 1373 1374 self.penalty_start = max(0, int(penalty_start)) 1375 self.mean = bool(mean)

1376

1377 - def get_params(self):

1378 """Return a dictionary containing all the estimator's parameters. 1379 """ 1380 return {"class_weight": self.class_weight, 1381 "penalty_start": self.penalty_start, 1382 "mean": self.mean}

1383

1384 - def fit(self, X, y, beta=None, sample_weight=None):

1385 """Fit the estimator to the data. 1386 """ 1387 X, y = check_arrays(X, check_labels(y)) 1388 if sample_weight is None: 1389 sample_weight = class_weight_to_sample_weight(self.class_weight, y) 1390 y, sample_weight = check_arrays(y, sample_weight) 1391 #sample_weight = sample_weight.ravel() 1392 1393 function = losses.RidgeLogisticRegression(X, y, self.l, 1394 weights=sample_weight, 1395 penalty_start=self.penalty_start, 1396 mean=self.mean) 1397 1398 # function = functions.CombinedFunction() 1399 # function.add_function(losses.LogisticRegression(X, y, mean=self.mean)) 1400 # function.add_penalty(penalties.L2Squared(self.l, 1401 # penalty_start=self.penalty_start)) 1402 1403 self.algorithm.check_compatibility(function, 1404 self.algorithm.INTERFACES) 1405 1406 # TODO: Should we use a seed here so that we get deterministic results? 1407 if beta is None: 1408 beta = self.start_vector.get_vector(X.shape[1]) 1409 1410 self.beta = self.algorithm.run(function, beta) 1411 1412 return self

1416 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) with 1417 with L1 and L2 penalties: 1418 1419 f(beta) = -loglik / n_samples 1420 + alpha * l * ||beta||_1 1421 + alpha * ((1.0 - l) / 2) * ||beta||²_2, 1422 1423 where 1424 1425 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)), 1426 1427 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)), 1428 1429 wi = weight of sample i, 1430 1431 and ||.||²_2 is the squared L2-norm. 1432 1433 Parameters 1434 ---------- 1435 l : Non-negative float. The L2 regularisation parameter. 1436 1437 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1438 Should be one of: 1439 1. GradientDescent(...) 1440 1441 Default is GradientDescent(...). 1442 1443 algorithm_params : A dict. The dictionary algorithm_params contains 1444 parameters that should be set in the algorithm. Passing 1445 algorithm=MyAlgorithm(**params) is equivalent to passing 1446 algorithm=MyAlgorithm() and algorithm_params=params. Default is 1447 an empty dictionary. 1448 1449 class_weight : Dict, 'auto' or None. If 'auto', class weights will be 1450 given inverse proportional to the frequency of the class in 1451 the data. If a dictionary is given, keys are classes and values 1452 are corresponding class weights. If None is given, the class 1453 weights will be uniform. 1454 1455 penalty_start : Non-negative integer. The number of columns, variables 1456 etc., to be exempt from penalisation. Equivalently, the first 1457 index to be penalised. Default is 0, all columns are included. 1458 1459 mean : Boolean. Whether to compute the mean loss or not. Default is True, 1460 the mean loss. 1461 1462 Examples 1463 -------- 1464 """

1465 - def __init__(self, l, alpha=1.0, 1466 algorithm=None, algorithm_params=dict(), 1467 class_weight=None, 1468 penalty_start=0, 1469 mean=True):

1470 1471 self.l = max(0.0, float(l)) 1472 self.alpha = float(alpha) 1473 1474 if algorithm is None: 1475 algorithm = proximal.FISTA(**algorithm_params) 1476 else: 1477 algorithm.set_params(**algorithm_params) 1478 1479 super(ElasticNetLogisticRegression, self).__init__(algorithm=algorithm, 1480 class_weight=class_weight) 1481 1482 self.penalty_start = max(0, int(penalty_start)) 1483 self.mean = bool(mean)

1484

1485 - def get_params(self):

1486 """Return a dictionary containing all the estimator's parameters. 1487 """ 1488 return {"class_weight": self.class_weight, 1489 "penalty_start": self.penalty_start, 1490 "mean": self.mean}

1491

1492 - def fit(self, X, y, beta=None, sample_weight=None):

1493 """Fit the estimator to the data. 1494 """ 1495 X, y = check_arrays(X, check_labels(y)) 1496 if sample_weight is None: 1497 sample_weight = class_weight_to_sample_weight(self.class_weight, y) 1498 y, sample_weight = check_arrays(y, sample_weight) 1499 1500 function = functions.CombinedFunction() 1501 function.add_loss(losses.LogisticRegression(X, y, mean=self.mean)) 1502 function.add_penalty(penalties.L2Squared(l=self.alpha * (1.0 - self.l), 1503 penalty_start=self.penalty_start)) 1504 function.add_prox(penalties.L1(l=self.alpha * self.l, 1505 penalty_start=self.penalty_start)) 1506 1507 self.algorithm.check_compatibility(function, 1508 self.algorithm.INTERFACES) 1509 1510 # TODO: Should we use a seed here so that we get deterministic results? 1511 if beta is None: 1512 beta = self.start_vector.get_vector(X.shape[1]) 1513 1514 self.beta = self.algorithm.run(function, beta) 1515 1516 return self

1520 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) 1521 with L1, L2 and TV penalties: 1522 1523 f(beta) = -loglik / n_samples 1524 + l1 * ||beta||_1 1525 + (l2 / 2) * ||beta||²_2 1526 + tv * TV(beta) 1527 where 1528 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)), 1529 1530 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)), 1531 1532 wi = weight of sample i. 1533 1534 Parameters 1535 ---------- 1536 l1 : Non-negative float. The Lagrange multiplier, or regularisation 1537 constant, for the L1 penalty. 1538 1539 l2 : Non-negative float. The Lagrange multiplier, or regularisation 1540 constant, for the ridge (L2) penalty. 1541 1542 tv : Non-negative float. The Lagrange multiplier, or regularisation 1543 constant, of the TV function. 1544 1545 A : Numpy or (usually) scipy.sparse array. The linear operator for the 1546 smoothed total variation Nesterov function. A must be given. 1547 1548 mu : Non-negative float. The regularisation constant for the smoothing. 1549 1550 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1551 Should be one of: 1552 1. CONESTA(...) 1553 2. StaticCONESTA(...) 1554 3. FISTA(...) 1555 4. ISTA(...) 1556 1557 Default is CONESTA(...). 1558 1559 algorithm_params : A dict. The dictionary algorithm_params contains 1560 parameters that should be set in the algorithm. Passing 1561 algorithm=CONESTA(**params) is equivalent to passing 1562 algorithm=CONESTA() and algorithm_params=params. Default is an 1563 empty dictionary. 1564 1565 class_weight : Dict, 'auto' or None. If 'auto', class weights will be 1566 given inverse proportional to the frequency of the class in 1567 the data. If a dictionary is given, keys are classes and values 1568 are corresponding class weights. If None is given, the class 1569 weights will be uniform. 1570 1571 penalty_start : Non-negative integer. The number of columns, variables 1572 etc., to be exempt from penalisation. Equivalently, the first 1573 index to be penalised. Default is 0, all columns are included. 1574 1575 mean : Boolean. Whether to compute the squared loss or the mean squared 1576 loss. Default is True, the mean squared loss. 1577 1578 Examples 1579 -------- 1580 >>> import numpy as np 1581 >>> import parsimony.estimators as estimators 1582 >>> import parsimony.algorithms.proximal as proximal 1583 >>> import parsimony.functions.nesterov.tv as total_variation 1584 >>> shape = (1, 4, 4) 1585 >>> n = 10 1586 >>> p = shape[0] * shape[1] * shape[2] 1587 >>> 1588 >>> np.random.seed(42) 1589 >>> X = np.random.rand(n, p) 1590 >>> y = np.random.randint(0, 2, (n, 1)) 1591 >>> l1 = 0.1 # L1 coefficient 1592 >>> l2 = 0.9 # Ridge coefficient 1593 >>> tv = 1.0 # TV coefficient 1594 >>> A, n_compacts = total_variation.linear_operator_from_shape(shape) 1595 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, 1596 ... algorithm=proximal.StaticCONESTA(max_iter=1000), 1597 ... mean=False) 1598 >>> res = lr.fit(X, y) 1599 >>> error = lr.score(X, y) 1600 >>> print "error = ", error 1601 error = 0.7 1602 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, 1603 ... algorithm=proximal.FISTA(max_iter=1000), 1604 ... mean=False) 1605 >>> lr = lr.fit(X, y) 1606 >>> error = lr.score(X, y) 1607 >>> print "error = ", error 1608 error = 0.7 1609 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, 1610 ... algorithm=proximal.ISTA(max_iter=1000), 1611 ... mean=False) 1612 >>> lr = lr.fit(X, y) 1613 >>> error = lr.score(X, y) 1614 >>> print "error = ", error 1615 error = 0.7 1616 """

1617 - def __init__(self, l1, l2, tv, 1618 A=None, mu=consts.TOLERANCE, 1619 algorithm=None, algorithm_params=dict(), 1620 class_weight=None, 1621 penalty_start=0, 1622 mean=True):

1623 1624 self.l1 = max(consts.TOLERANCE, float(l1)) 1625 self.l2 = max(consts.TOLERANCE, float(l2)) 1626 self.tv = max(consts.FLOAT_EPSILON, float(tv)) 1627 1628 if algorithm is None: 1629 algorithm = proximal.CONESTA(**algorithm_params) 1630 else: 1631 algorithm.set_params(**algorithm_params) 1632 1633 if isinstance(algorithm, proximal.CONESTA) \ 1634 and self.tv < consts.TOLERANCE: 1635 algorithm = proximal.FISTA(**algorithm_params) 1636 1637 super(LogisticRegressionL1L2TV, self).__init__(algorithm=algorithm, 1638 class_weight=class_weight) 1639 1640 if A is None: 1641 raise TypeError("A may not be None.") 1642 self.A = A 1643 1644 try: 1645 self.mu = float(mu) 1646 except (ValueError, TypeError): 1647 self.mu = None 1648 1649 self.penalty_start = int(penalty_start) 1650 self.mean = bool(mean)

1651

1652 - def get_params(self):

1653 """Return a dictionary containing all the estimator's parameters. 1654 """ 1655 return {"l1": self.l1, "l2": self.l2, "tv": self.tv, 1656 "A": self.A, "mu": self.mu, "class_weight": self.class_weight, 1657 "penalty_start": self.penalty_start, "mean": self.mean}

1658

1659 - def fit(self, X, y, beta=None, sample_weight=None):

1660 """Fit the estimator to the data. 1661 """ 1662 X, y = check_arrays(X, check_labels(y)) 1663 if sample_weight is None: 1664 sample_weight = class_weight_to_sample_weight(self.class_weight, y) 1665 y, sample_weight = check_arrays(y, sample_weight) 1666 #sample_weight = sample_weight.ravel() 1667 1668 function = functions.LogisticRegressionL1L2TV(X, y, 1669 self.l1, self.l2, self.tv, 1670 A=self.A, 1671 weights=sample_weight, 1672 penalty_start=self.penalty_start, 1673 mean=self.mean) 1674 1675 self.algorithm.check_compatibility(function, 1676 self.algorithm.INTERFACES) 1677 1678 # TODO: Should we use a seed here so that we get deterministic results? 1679 if beta is None: 1680 beta = self.start_vector.get_vector(X.shape[1]) 1681 1682 if self.mu is None: 1683 self.mu = function.estimate_mu(beta) 1684 else: 1685 self.mu = float(self.mu) 1686 1687 function.set_params(mu=self.mu) 1688 self.beta = self.algorithm.run(function, beta) 1689 1690 return self

1694 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) 1695 with L1, L2 and Group Lasso penalties: 1696 1697 f(beta) = -loglik / n_samples 1698 + l1 * ||beta||_1 1699 + (l2 / (2 * n)) * ||beta||²_2 1700 + gl * GL(beta) 1701 where 1702 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)), 1703 1704 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)), 1705 1706 wi = weight of sample i. 1707 1708 Parameters 1709 ---------- 1710 l1 : Non-negative float. The Lagrange multiplier, or regularisation 1711 constant, for the L1 penalty. 1712 1713 l2 : Non-negative float. The Lagrange multiplier, or regularisation 1714 constant, for the ridge (L2) penalty. 1715 1716 gl : Non-negative float. The Lagrange multiplier, or regularisation 1717 constant, of the group lasso function. 1718 1719 A : Numpy or (usually) scipy.sparse array. The linear operator for the 1720 smoothed total variation Nesterov function. A must be given. 1721 1722 mu : Non-negative float. The regularisation constant for the Nesterov 1723 smoothing. 1724 1725 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1726 Should be one of: 1727 1. FISTA(...) 1728 2. ISTA(...) 1729 3. StaticCONESTA(...) 1730 1731 Default is FISTA(...). 1732 1733 algorithm_params : A dict. The dictionary algorithm_params contains 1734 parameters that should be set in the algorithm. Passing 1735 algorithm=FISTA(**params) is equivalent to passing 1736 algorithm=FISTA() and algorithm_params=params. Default is an empty 1737 dictionary. 1738 1739 class_weight : Dict, 'auto' or None. If 'auto', class weights will be 1740 given inverse proportional to the frequency of the class in 1741 the data. If a dictionary is given, keys are classes and values 1742 are corresponding class weights. If None is given, the class 1743 weights will be uniform. 1744 1745 penalty_start : Non-negative integer. The number of columns, variables 1746 etc., to be exempt from penalisation. Equivalently, the first 1747 index to be penalised. Default is 0, all columns are included. 1748 1749 mean : Boolean. Whether to compute the squared loss or the mean squared 1750 loss. Default is True, the mean squared loss. 1751 1752 Examples 1753 -------- 1754 >>> import numpy as np 1755 >>> import parsimony.estimators as estimators 1756 >>> import parsimony.algorithms.proximal as proximal 1757 >>> import parsimony.functions.nesterov.gl as group_lasso 1758 >>> 1759 >>> np.random.seed(42) 1760 >>> 1761 >>> n, p = 10, 16 1762 >>> groups = [range(0, p / 2), range(p / 2, p)] 1763 >>> weights = [1.5, 0.5] 1764 >>> A = group_lasso.linear_operator_from_groups(p, groups=groups, 1765 ... weights=weights) 1766 >>> 1767 >>> X = np.random.rand(n, p) 1768 >>> y = np.random.randint(0, 2, (n, 1)) 1769 >>> l1 = 0.1 # L1 coefficient 1770 >>> l2 = 0.9 # Ridge coefficient 1771 >>> gl = 1.0 # TV coefficient 1772 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A=A, 1773 ... algorithm=proximal.StaticCONESTA(max_iter=1000), 1774 ... mean=False) 1775 >>> res = lr.fit(X, y) 1776 >>> error = lr.score(X, y) 1777 >>> print "error = ", error 1778 error = 0.7 1779 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A=A, 1780 ... algorithm=proximal.FISTA(max_iter=1000), 1781 ... mean=False) 1782 >>> lr = lr.fit(X, y) 1783 >>> error = lr.score(X, y) 1784 >>> print "error = ", error 1785 error = 0.7 1786 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A, 1787 ... algorithm=proximal.ISTA(max_iter=1000), 1788 ... mean=False) 1789 >>> lr = lr.fit(X, y) 1790 >>> error = lr.score(X, y) 1791 >>> print "error = ", error 1792 error = 0.7 1793 """

1794 - def __init__(self, l1, l2, gl, 1795 A=None, mu=consts.TOLERANCE, 1796 weigths=None, 1797 algorithm=None, algorithm_params=dict(), 1798 class_weight=None, 1799 penalty_start=0, 1800 mean=True):

1801 1802 self.l1 = max(consts.TOLERANCE, float(l1)) 1803 self.l2 = max(consts.TOLERANCE, float(l2)) 1804 self.gl = max(consts.FLOAT_EPSILON, float(gl)) 1805 1806 if algorithm is None: 1807 algorithm = proximal.FISTA(**algorithm_params) 1808 else: 1809 algorithm.set_params(**algorithm_params) 1810 1811 if isinstance(algorithm, proximal.CONESTA) \ 1812 and self.gl < consts.TOLERANCE: 1813 algorithm = proximal.FISTA(**algorithm_params) 1814 1815 super(LogisticRegressionL1L2GL, self).__init__(algorithm=algorithm, 1816 class_weight=class_weight) 1817 1818 if isinstance(algorithm, proximal.CONESTA) \ 1819 and self.gl < consts.TOLERANCE: 1820 warnings.warn("The GL parameter should be positive.") 1821 1822 if A is None: 1823 raise TypeError("A may not be None.") 1824 self.A = A 1825 1826 try: 1827 self.mu = float(mu) 1828 except (ValueError, TypeError): 1829 self.mu = None 1830 1831 self.penalty_start = int(penalty_start) 1832 self.mean = bool(mean)

1833

1834 - def get_params(self):

1835 """Returns a dictionary containing all the estimator's parameters. 1836 """ 1837 return {"l1": self.l1, "l2": self.l2, "gl": self.gl, 1838 "A": self.A, "mu": self.mu, "class_weight": self.class_weight, 1839 "penalty_start": self.penalty_start, "mean": self.mean}

1840

1841 - def fit(self, X, y, beta=None, sample_weight=None):

1842 """Fit the estimator to the data. 1843 """ 1844 X, y = check_arrays(X, check_labels(y)) 1845 if sample_weight is None: 1846 sample_weight = class_weight_to_sample_weight(self.class_weight, y) 1847 y, sample_weight = check_arrays(y, sample_weight) 1848 function = functions.LogisticRegressionL1L2GL(X, y, 1849 self.l1, self.l2, self.gl, 1850 A=self.A, 1851 weights=sample_weight, 1852 penalty_start=self.penalty_start, 1853 mean=self.mean) 1854 1855 self.algorithm.check_compatibility(function, 1856 self.algorithm.INTERFACES) 1857 1858 # TODO: Should we use a seed here so that we get deterministic results? 1859 if beta is None: 1860 beta = self.start_vector.get_vector(X.shape[1]) 1861 1862 if self.mu is None: 1863 self.mu = function.estimate_mu(beta) 1864 else: 1865 self.mu = float(self.mu) 1866 1867 function.set_params(mu=self.mu) 1868 self.beta = self.algorithm.run(function, beta) 1869 1870 return self

1874 """Linear regression with L2 and simultaneously smoothed L1 and TV 1875 penalties: 1876 1877 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 1878 + (l2 / 2) * ||beta||²_2 1879 + L1TV(beta), 1880 1881 where L1TV is l1 * L1(beta) + tv * TV(beta) smoothed together by Nesterov's 1882 smoothing. 1883 1884 Parameters 1885 ---------- 1886 l2 : Non-negative float. The Lagrange multiplier, or regularisation 1887 constant, for the ridge (L2) penalty. 1888 1889 l1 : Non-negative float. The Lagrange multiplier, or regularisation 1890 constant, for the L1 penalty. 1891 1892 tv : Non-negative float. The Lagrange multiplier, or regularisation 1893 constant, of the TV function. 1894 1895 A : A list or tuple with 4 elements of (usually sparse) arrays. The linear 1896 operator for the smoothed L1+TV. The first element must be the 1897 linear operator for L1 and the following three for TV. May not be 1898 None. 1899 1900 algorithm : ExplicitAlgorithm. The algorithm that should be applied. 1901 Should be one of: 1902 1. ExcessiveGapMethod(...) 1903 1904 Default is ExcessiveGapMethod(...). 1905 1906 algorithm_params : A dict. The dictionary algorithm_params contains 1907 parameters that should be set in the algorithm. Passing 1908 algorithm=ExcessiveGapMethod(**params) is equivalent to passing 1909 algorithm=ExcessiveGapMethod() and algorithm_params=params. Default 1910 is an empty dictionary. 1911 1912 penalty_start : Non-negative integer. The number of columns, variables 1913 etc., to be exempt from penalisation. Equivalently, the first 1914 index to be penalised. Default is 0, all columns are included. 1915 1916 mean : Boolean. Whether to compute the squared loss or the mean squared 1917 loss. Default is True, the mean squared loss. 1918 1919 Examples 1920 -------- 1921 >>> import numpy as np 1922 >>> import parsimony.estimators as estimators 1923 >>> import parsimony.algorithms.primaldual as primaldual 1924 >>> import parsimony.functions.nesterov.l1tv as l1tv 1925 >>> shape = (1, 4, 4) 1926 >>> n = 10 1927 >>> p = shape[0] * shape[1] * shape[2] 1928 >>> 1929 >>> np.random.seed(42) 1930 >>> X = np.random.rand(n, p) 1931 >>> y = np.random.rand(n, 1) 1932 >>> l1 = 0.1 # L1 coefficient 1933 >>> l2 = 0.9 # Ridge coefficient 1934 >>> tv = 1.0 # TV coefficient 1935 >>> A = l1tv.linear_operator_from_shape(shape, p, penalty_start=0) 1936 >>> lr = estimators.LinearRegressionL2SmoothedL1TV(l2, l1, tv, A, 1937 ... algorithm=primaldual.ExcessiveGapMethod(), 1938 ... algorithm_params=dict(max_iter=1000), 1939 ... mean=False) 1940 >>> res = lr.fit(X, y) 1941 >>> error = lr.score(X, y) 1942 >>> round(error, 14) 1943 0.06837304969156 1944 """

1945 - def __init__(self, l2, l1, tv, 1946 A=None, 1947 algorithm=None, algorithm_params=dict(), 1948 penalty_start=0, 1949 mean=True):

1950 1951 if algorithm is None: 1952 algorithm = primaldual.ExcessiveGapMethod(**algorithm_params) 1953 else: 1954 algorithm.set_params(**algorithm_params) 1955 1956 super(LinearRegressionL2SmoothedL1TV, self).__init__( 1957 algorithm=algorithm) 1958 1959 self.l2 = max(0.0, float(l2)) 1960 self.l1 = max(0.0, float(l1)) 1961 self.tv = max(0.0, float(tv)) 1962 1963 if self.l2 < consts.TOLERANCE: 1964 warnings.warn("The ridge parameter should be non-zero.") 1965 1966 if A is None: 1967 raise TypeError("The A matrix may not be None.") 1968 self.A = A 1969 1970 self.penalty_start = max(0, int(penalty_start)) 1971 self.mean = bool(mean)

1972

1973 - def get_params(self):

1974 """Returns a dictionary containing all the estimator's parameters. 1975 """ 1976 return {"l1": self.l1, "l2": self.l2, "tv": self.tv, "A": self.A, 1977 "penalty_start": self.penalty_start, "mean": self.mean}

1978

1979 - def fit(self, X, y, beta=None):

1980 """Fit the estimator to the data. 1981 """ 1982 X, y = check_arrays(X, y) 1983 function = functions.LinearRegressionL2SmoothedL1TV(X, y, 1984 self.l2, self.l1, self.tv, 1985 A=self.A, 1986 penalty_start=self.penalty_start, 1987 mean=self.mean) 1988 1989 self.algorithm.check_compatibility(function, 1990 self.algorithm.INTERFACES) 1991 1992 self.beta = self.algorithm.run(function, beta) 1993 1994 return self

1995

1996 - def score(self, X, y):

1997 """Return the mean squared error of the estimator. 1998 """ 1999 n, p = X.shape 2000 y_hat = np.dot(X, self.beta) 2001 2002 return np.sum((y_hat - y) ** 2.0) / float(n)

2006 """Estimator for PLS regression 2007 2008 f(beta, X, Y) = -Cov(X.beta, Y), 2009 2010 where Cov(., .) is the covariance. 2011 2012 Parameters 2013 ---------- 2014 K : Positive integer. The number of components to compute. 2015 2016 algorithm : OR(ImplicitAlgorithm, ExplicitAlgorithm). The algorithm that 2017 should be used. Should be one of: 2018 1. PLSR() 2019 2. MultiblockFISTA(...) 2020 2021 Default is PLSR(...). 2022 2023 algorithm_params : A dict. The dictionary algorithm_params contains 2024 parameters that should be set in the algorithm. Passing 2025 algorithm=MyAlgorithm(**params) is equivalent to passing 2026 algorithm=MyAlgorithm() and algorithm_params=params. Default 2027 is an empty dictionary. 2028 2029 start_vector : BaseStartVector. Generates the start vector that will be 2030 used. 2031 2032 mean : Boolean. Whether or not to compute the means squared error or the 2033 squared error. Default is True, compute the means squared error. 2034 2035 Examples 2036 -------- 2037 >>> import parsimony.estimators as estimators 2038 >>> import parsimony.algorithms.nipals as nipals 2039 >>> import parsimony.algorithms.multiblock as multiblock 2040 >>> import numpy as np 2041 >>> np.random.seed(42) 2042 >>> 2043 >>> n, p = 16, 10 2044 >>> X = np.random.rand(n, p) 2045 >>> y = np.random.rand(n, 1) 2046 >>> plsr = estimators.PLSRegression(K=4, algorithm=nipals.PLSR(), 2047 ... algorithm_params=dict(max_iter=100)) 2048 >>> error = plsr.fit(X, y).score(X, y) 2049 >>> print "error = ", error 2050 error = 0.0222345224457 2051 """ 2052 # >>> 2053 # >>> np.random.seed(42) 2054 # >>> 2055 # >>> X = np.random.rand(n, p) 2056 # >>> y = np.random.rand(n, 1) 2057 # >>> plsr = estimators.PLSRegression(K=4, 2058 # ... algorithm=multiblock.MultiblockFISTA(), 2059 # ... algorithm_params=dict(max_iter=100)) 2060 # >>> plsr.fit(X, y) 2061 # >>> error = plsr.score(X, y) 2062 # >>> print "error = ", error 2063 # error = 0.0222345202333

2064 - def __init__(self, K=2, algorithm=None, algorithm_params=dict(), 2065 start_vector=start_vectors.RandomStartVector(), 2066 unbiased=True, mean=True):

2067 2068 self.K = max(1, int(K)) 2069 2070 if algorithm is None: 2071 algorithm = nipals.PLSR(**algorithm_params) 2072 else: 2073 algorithm.set_params(**algorithm_params) 2074 2075 super(PLSRegression, self).__init__(algorithm=algorithm, 2076 start_vector=start_vector) 2077 2078 self.unbiased = bool(unbiased) 2079 self.mean = bool(mean)

2080

2081 - def get_params(self):

2082 """Return a dictionary containing the estimator's parameters 2083 """ 2084 return {"unbiased": self.unbiased}

2085

2086 - def fit(self, X, Y, w=None):

2087 """Fit the estimator to the data. 2088 """ 2089 X, Y = check_arrays(X, Y) 2090 2091 n, p = X.shape 2092 _, q = Y.shape 2093 2094 rankone = deflation.RankOneDeflation() 2095 2096 self.W = np.zeros((p, self.K)) 2097 self.T = np.zeros((n, self.K)) 2098 self.C = np.zeros((q, self.K)) 2099 self.U = np.zeros((n, self.K)) 2100 self.P = np.zeros((p, self.K)) 2101 for k in range(self.K): 2102 2103 if isinstance(self.algorithm, bases.ExplicitAlgorithm): 2104 cov1 = mb_losses.LatentVariableCovariance([X, Y], 2105 unbiased=self.unbiased) 2106 cov2 = mb_losses.LatentVariableCovariance([Y, X], 2107 unbiased=self.unbiased) 2108 2109 l21 = penalties.L2(c=1.0) 2110 l22 = penalties.L2(c=1.0) 2111 2112 function = mb_losses.CombinedMultiblockFunction([X, Y]) 2113 function.add_function(cov1, 0, 1) 2114 function.add_function(cov2, 1, 0) 2115 2116 function.add_constraint(l21, 0) 2117 function.add_constraint(l22, 1) 2118 2119 self.algorithm.check_compatibility(function, 2120 self.algorithm.INTERFACES) 2121 2122 # TODO: Should we use a seed here so that we get deterministic 2123 # results? 2124 # if w is None or k > 0: 2125 w = [self.start_vector.get_vector(X.shape[1]), 2126 self.start_vector.get_vector(Y.shape[1])] 2127 2128 print "max iter:", self.algorithm.max_iter 2129 w = self.algorithm.run(function, w) 2130 c = w[1] 2131 w = w[0] 2132 else: 2133 w, c = self.algorithm.run([X, Y], w if k == 0 else None) 2134 2135 t = np.dot(X, w) 2136 2137 tt = np.dot(t.T, t)[0, 0] 2138 c = np.dot(Y.T, t) 2139 if tt > consts.TOLERANCE: 2140 c *= 1.0 / tt 2141 2142 cc = np.dot(c.T, c)[0, 0] 2143 u = np.dot(Y, c) 2144 if cc > consts.TOLERANCE: 2145 u *= 1.0 / cc 2146 2147 p = np.dot(X.T, t) 2148 if tt > consts.TOLERANCE: 2149 p *= 1.0 / tt 2150 2151 self.W[:, [k]] = w 2152 self.T[:, [k]] = t 2153 self.C[:, [k]] = c 2154 self.U[:, [k]] = u 2155 self.P[:, [k]] = p 2156 2157 if k < self.K - 1: 2158 X = rankone.deflate(X, t, p) 2159 # Y = rankone.deflate(Y, t, c) 2160 2161 self.Ws = np.dot(self.W, np.linalg.pinv(np.dot(self.P.T, self.W))) 2162 2163 self.beta = np.dot(self.Ws, self.C.T) 2164 2165 return self

2166

2167 - def score(self, X, Y):

2168 """Returns the (mean) squared error of the estimator. 2169 """ 2170 X, Y = check_arrays(X, Y) 2171 2172 Yhat = np.dot(X, self.beta) 2173 err = maths.normFro(Yhat - Y) ** 2.0 2174 2175 if self.mean: 2176 n, p = X.shape 2177 err /= float(n) 2178 2179 return err

2183 """Estimator for sparse PLS regression 2184 2185 f(w, c, X, Y) = -Cov(X.w, Y.c) + l.|w|_1 + k.|c|_1, 2186 2187 where Cov(., .) is the covariance and |.|_1 is the L1 norm. 2188 2189 Parameters 2190 ---------- 2191 l : List or tuple of two non-negative floats. The Lagrange multipliers, or 2192 regularisation constants, for the X and Y blocks, respectively. 2193 2194 K : Positive integer. The number of components to compute. 2195 2196 algorithm : OR(ImplicitAlgorithm, ExplicitAlgorithm). The algorithm that 2197 should be used. Should be one of: 2198 1. SparsePLSR() 2199 2. MultiblockFISTA(...) 2200 2201 Default is SparsePLSR(...). 2202 2203 algorithm_params : A dict. The dictionary algorithm_params contains 2204 parameters that should be set in the algorithm. Passing 2205 algorithm=MyAlgorithm(**params) is equivalent to passing 2206 algorithm=MyAlgorithm() and algorithm_params=params. Default 2207 is an empty dictionary. 2208 2209 start_vector : BaseStartVector. Generates the start vector that will be 2210 used. 2211 2212 mean : Boolean. Whether or not to compute the means squared error or the 2213 squared error. Default is True, compute the means squared error. 2214 2215 Examples 2216 -------- 2217 >>> import parsimony.estimators as estimators 2218 >>> import parsimony.algorithms.nipals as nipals 2219 >>> import parsimony.algorithms.multiblock as multiblock 2220 >>> import numpy as np 2221 >>> np.random.seed(42) 2222 >>> 2223 >>> n, p = 16, 10 2224 >>> X = np.random.rand(n, p) 2225 >>> y = np.random.rand(n, 1) 2226 >>> plsr = estimators.SparsePLSRegression(l=[3.0, 0.0], K=1, 2227 ... algorithm=nipals.SparsePLSR(), 2228 ... algorithm_params=dict(max_iter=100)) 2229 >>> error = plsr.fit(X, y).score(X, y) 2230 >>> print plsr.W 2231 [[ 0.37053417] 2232 [ 0.54969643] 2233 [ 0.29593809] 2234 [ 0.2937247 ] 2235 [ 0.49989677] 2236 [ 0.0895912 ] 2237 [ 0. ] 2238 [ 0.35883331] 2239 [ 0. ] 2240 [ 0. ]] 2241 >>> print plsr.C 2242 [[ 0.32949094]] 2243 >>> print "error = ", error 2244 error = 0.0547513077301 2245 """ 2246 # >>> 2247 # >>> np.random.seed(42) 2248 # >>> 2249 # >>> X = np.random.rand(n, p) 2250 # >>> y = np.random.rand(n, 1) 2251 # >>> plsr = estimators.SparsePLSRegression(l=[0.1, 0.0], K=1, 2252 # ... algorithm=multiblock.MultiblockFISTA(), 2253 # ... algorithm_params=dict(max_iter=100)) 2254 # >>> error = plsr.fit(X, y).score(X, y) 2255 # >>> print plsr.W 2256 # [[ 0.37053423] 2257 # [ 0.54969634] 2258 # [ 0.29593824] 2259 # [ 0.29372464] 2260 # [ 0.49989668] 2261 # [ 0.0895912 ] 2262 # [ 0. ] 2263 # [ 0.35883343] 2264 # [ 0. ] 2265 # [ 0. ]] 2266 # >>> print plsr.C 2267 # [[ 0.32949093]] 2268 # >>> print "error = ", error 2269 # error = 0.0547513070388

2270 - def __init__(self, l, K=2, algorithm=None, algorithm_params=dict(), 2271 start_vector=start_vectors.RandomStartVector(), 2272 unbiased=True, mean=True):

2273 2274 self.l = [max(0.0, float(l[0])), 2275 max(0.0, float(l[1]))] 2276 self.K = max(1, int(K)) 2277 2278 if algorithm is None: 2279 algorithm = nipals.SparsePLSR(**algorithm_params) 2280 else: 2281 algorithm.set_params(**algorithm_params) 2282 2283 super(SparsePLSRegression, self).__init__(algorithm=algorithm, 2284 start_vector=start_vector) 2285 2286 self.unbiased = bool(unbiased) 2287 self.mean = bool(mean)

2288

2289 - def get_params(self):

2290 """Return a dictionary containing the estimator's parameters 2291 """ 2292 return {"l": self.l, "K": self.K, "unbiased": self.unbiased, 2293 "mean": self.mean}

2294

2295 - def fit(self, X, Y, w=None):

2296 """Fit the estimator to the data. 2297 """ 2298 X, Y = check_arrays(X, Y) 2299 2300 n, p = X.shape 2301 _, q = Y.shape 2302 2303 rankone = deflation.RankOneDeflation() 2304 2305 self.W = np.zeros((p, self.K)) 2306 self.T = np.zeros((n, self.K)) 2307 self.C = np.zeros((q, self.K)) 2308 self.U = np.zeros((n, self.K)) 2309 self.P = np.zeros((p, self.K)) 2310 for k in range(self.K): 2311 2312 if isinstance(self.algorithm, bases.ExplicitAlgorithm): 2313 cov1 = mb_losses.LatentVariableCovariance([X, Y], 2314 unbiased=self.unbiased) 2315 cov2 = mb_losses.LatentVariableCovariance([Y, X], 2316 unbiased=self.unbiased) 2317 2318 l1l2_1 = penalties.L1L2Squared(self.l[0], 1.0) 2319 l1l2_2 = penalties.L1L2Squared(self.l[1], 1.0) 2320 # l21 = penalties.L2(c=1.0) 2321 # l22 = penalties.L2(c=1.0) 2322 # l11 = penalties.L1(l=self.l[0]) 2323 # l12 = penalties.L1(l=self.l[1]) 2324 2325 function = mb_losses.CombinedMultiblockFunction([X, Y]) 2326 function.add_function(cov1, 0, 1) 2327 function.add_function(cov2, 1, 0) 2328 2329 function.add_penalty(l1l2_1, 0) 2330 function.add_penalty(l1l2_2, 1) 2331 2332 # function.add_penalty(l11, 0) 2333 # function.add_penalty(l12, 1) 2334 # function.add_constraint(l21, 0) 2335 # function.add_constraint(l22, 1) 2336 2337 self.algorithm.check_compatibility(function, 2338 self.algorithm.INTERFACES) 2339 2340 # TODO: Should we use a seed here so that we get deterministic 2341 # results? 2342 # if w is None or k > 0: 2343 w = [self.start_vector.get_vector(X.shape[1]), 2344 self.start_vector.get_vector(Y.shape[1])] 2345 2346 print "max iter:", self.algorithm.max_iter 2347 w = self.algorithm.run(function, w) 2348 c = w[1] 2349 w = w[0] 2350 else: 2351 self.algorithm.set_params(l=self.l) 2352 w, c = self.algorithm.run([X, Y], w if k == 0 else None) 2353 2354 t = np.dot(X, w) 2355 2356 tt = np.dot(t.T, t)[0, 0] 2357 c = np.dot(Y.T, t) 2358 if tt > consts.TOLERANCE: 2359 c *= 1.0 / tt 2360 2361 cc = np.dot(c.T, c)[0, 0] 2362 u = np.dot(Y, c) 2363 if cc > consts.TOLERANCE: 2364 u *= 1.0 / cc 2365 2366 p = np.dot(X.T, t) 2367 if tt > consts.TOLERANCE: 2368 p *= 1.0 / tt 2369 2370 self.W[:, [k]] = w 2371 self.T[:, [k]] = t 2372 self.C[:, [k]] = c 2373 self.U[:, [k]] = u 2374 self.P[:, [k]] = p 2375 2376 if k < self.K - 1: 2377 X = rankone.deflate(X, t, p) 2378 # Y = rankone.deflate(Y, t, c) 2379 2380 self.Ws = np.dot(self.W, np.linalg.pinv(np.dot(self.P.T, self.W))) 2381 2382 self.beta = np.dot(self.Ws, self.C.T) 2383 2384 return self

2385

2386 - def score(self, X, Y):

2387 """Returns the (mean) squared error of the estimator. 2388 """ 2389 X, Y = check_arrays(X, Y) 2390 2391 Yhat = np.dot(X, self.beta) 2392 err = maths.normFro(Yhat - Y) ** 2.0 2393 2394 if self.mean: 2395 n, p = X.shape 2396 err /= float(n) 2397 2398 return err

2402 """Estimator for the clustering problem, i.e. for 2403 2404 f(C, mu) = sum_{i=1}^K sum_{x in C_i} |x - mu_i|², 2405 2406 where C = {C_1, ..., C_K} is a set of sets of points, mu_i is the mean of 2407 C_i and |.|² is the squared Euclidean norm. 2408 2409 This loss function is known as the within-cluster sum of squares. 2410 2411 Parameters 2412 ---------- 2413 K : Positive integer. The number of clusters to find. 2414 2415 algorithm : Currently only the K-means algorithm (Lloyd's algorithm). The 2416 algorithm that should be used. Should be one of: 2417 1. KMeans(...) 2418 2419 Default is KMeans(...). 2420 2421 algorithm_params : A dictionary. The dictionary algorithm_params contains 2422 parameters that should be set in the algorithm. Passing 2423 algorithm=MyAlgorithm(**params) is equivalent to passing 2424 algorithm=MyAlgorithm() and algorithm_params=params. Default 2425 is an empty dictionary. 2426 2427 Examples 2428 -------- 2429 >>> import parsimony.estimators as estimators 2430 >>> import parsimony.algorithms.cluster as cluster 2431 >>> import numpy as np 2432 >>> np.random.seed(1337) 2433 >>> 2434 >>> K = 3 2435 >>> n, p = 150, 2 2436 >>> X = np.vstack((2 * np.random.rand(n / 3, 2) - 2, 2437 ... 0.5 * np.random.rand(n / 3, 2), 2438 ... np.hstack([0.5 * np.random.rand(n / 3, 1) - 1, 2439 ... 0.5 * np.random.rand(n / 3, 1)]))) 2440 >>> lloyds = cluster.KMeans(K, max_iter=100, repeat=10) 2441 >>> KMeans = estimators.Clustering(K, algorithm=lloyds) 2442 >>> error = KMeans.fit(X).score(X) 2443 >>> print error 2444 27.6675491884 2445 >>> 2446 >>> #import matplotlib.pyplot as plot 2447 >>> #mus = KMeans._means 2448 >>> #plot.plot(X[:, 0], X[:, 1], '*') 2449 >>> #plot.plot(mus[:, 0], mus[:, 1], 'rs') 2450 >>> #plot.show() 2451 """

2452 - def __init__(self, K, algorithm=None, algorithm_params=dict()):

2453 2454 self.K = max(1, int(K)) 2455 2456 if algorithm is None: 2457 algorithm = cluster.KMeans(**algorithm_params) 2458 else: 2459 algorithm.set_params(**algorithm_params) 2460 2461 super(Clustering, self).__init__(algorithm=algorithm)

2462

2463 - def get_params(self):

2464 """Return a dictionary containing the estimator's own input parameters. 2465 """ 2466 return {"K": self.K}

2467

2468 - def fit(self, X, means=None):

2469 """Fit the estimator to the data. 2470 """ 2471 X = check_arrays(X) 2472 2473 self._means = self.algorithm.run(X) 2474 2475 return self

2476

2477 - def predict(self, X):

2478 """Perform prediction using the fitted parameters. 2479 2480 Finds the closest cluster centre to each point. I.e. assigns a class to 2481 each point. 2482 2483 Returns 2484 ------- 2485 closest : A list. A list with p elements: The cluster indices. 2486 """ 2487 X = check_arrays(X) 2488 2489 dists = np.zeros((X.shape[0], self.K)) 2490 i = 0 2491 for mu in self._means: 2492 dist = np.sum((X - mu) ** 2.0, axis=1) 2493 dists[:, i] = dist 2494 i += 1 2495 2496 closest = np.argmin(dists, axis=1).tolist() 2497 2498 return closest

2499

2500 - def parameters(self):

2501 """Returns the estimator's fitted means. 2502 """ 2503 return self._means

2504

2505 - def score(self, X):

2506 """Computes the within-cluster sum of squares. 2507 """ 2508 X = check_arrays(X) 2509 2510 closest = np.array(self.predict(X)) 2511 wcss = 0.0 2512 for i in xrange(self.K): 2513 idx = closest == i 2514 wcss += np.sum((X[idx, :] - self._means[i, :]) ** 2.0) 2515 2516 return wcss

Source Code for Module parsimony.estimators