1
2 """Estimators encapsulates an algorithm with (possibly) a corresponding loss
3 function and penalties.
4
5 Created on Sat Nov 2 15:19:17 2013
6
7 Copyright (c) 2013-2014, CEA/DSV/I2BM/Neurospin. All rights reserved.
8
9 @author: Tommy Löfstedt, Edouard Duchesnay
10 @email: lofstedt.tommy@gmail.com, edouard.duchesnay@cea.fr
11 @license: BSD 3-clause.
12 """
13 import abc
14 import warnings
15
16 import numpy as np
17
18 import parsimony.utils.consts as consts
19 import parsimony.utils.maths as maths
20 import parsimony.functions as functions
21 import parsimony.functions.losses as losses
22 import parsimony.functions.multiblock.losses as mb_losses
23 import parsimony.functions.penalties as penalties
24 import parsimony.utils.start_vectors as start_vectors
25 import parsimony.utils.linalgs as linalgs
26 import parsimony.algorithms.bases as bases
27 import parsimony.algorithms.cluster as cluster
28 import parsimony.algorithms.gradient as gradient
29 import parsimony.algorithms.proximal as proximal
30 import parsimony.algorithms.primaldual as primaldual
31 import parsimony.algorithms.nipals as nipals
32 import parsimony.algorithms.deflation as deflation
33 from parsimony.utils import check_arrays
34 from parsimony.utils import class_weight_to_sample_weight, check_labels
35
36 __all__ = ["BaseEstimator",
37 "RegressionEstimator", "LogisticRegressionEstimator",
38
39 "LinearRegression", "RidgeRegression", "Lasso", "ElasticNet",
40
41 "LinearRegressionL1L2TV",
42 "LinearRegressionL1L2GL",
43
44 "LogisticRegression",
45 "ElasticNetLogisticRegression",
46 "LogisticRegressionL1L2TV",
47 "LogisticRegressionL1L2GL",
48
49 "LinearRegressionL2SmoothedL1TV"]
53 """Base class for estimators.
54
55 Parameters
56 ----------
57 algorithm : BaseAlgorithm. The algorithm that will be used.
58 """
59 __metaclass__ = abc.ABCMeta
60
62
63 self.algorithm = algorithm
64
66 """Set the given input parameters in the estimator.
67 """
68 for k in kwargs:
69 self.__setattr__(k, kwargs[k])
70
71 @abc.abstractmethod
73 """Return a dictionary containing the estimator's own input parameters.
74 """
75 raise NotImplementedError('Abstract method "get_params" must be '
76 'specialised!')
77
79 """Fit the estimator to the data.
80 """
81 raise NotImplementedError('Abstract method "fit" must be '
82 'specialised!')
83
84 @abc.abstractmethod
86 """Perform prediction using the fitted parameters.
87 """
88 raise NotImplementedError('Abstract method "predict" must be '
89 'specialised!')
90
91
92
94 """Returns the estimator's fitted parameters, e.g. the regression
95 coefficients.
96
97 What is returned depends on the estimator. See the estimator's
98 documentation.
99 """
100 raise NotImplementedError('Abstract method "parameters" must be '
101 'specialised!')
102
103
104 @abc.abstractmethod
106 raise NotImplementedError('Abstract method "score" must be '
107 'specialised!')
108
109
111 """If an InformationAlgorithm, returns the information dictionary.
112 """
113 if not isinstance(self.algorithm, bases.InformationAlgorithm):
114 raise AttributeError("Algorithm is not an " \
115 "InformationAlgorithm.")
116
117 return self.algorithm.info_get()
118
121 """Base estimator for regression estimation.
122
123 Parameters
124 ----------
125 algorithm : ExplicitAlgorithm. The algorithm that will be applied.
126
127 start_vector : BaseStartVector. Generates the start vector that will be
128 used.
129 """
130 __metaclass__ = abc.ABCMeta
131
138
139 @abc.abstractmethod
140 - def fit(self, X, y):
141 """Fit the estimator to the data.
142 """
143 raise NotImplementedError('Abstract method "fit" must be '
144 'specialised!')
145
147 """Perform prediction using the fitted parameters.
148 """
149 return np.dot(check_arrays(X), self.beta)
150
151 @abc.abstractmethod
153 """Return the score of the estimator.
154
155 The score is a measure of "goodness" of the fit to the data.
156 """
157
158
159
160 raise NotImplementedError('Abstract method "score" must be '
161 'specialised!')
162
165 """Base estimator for logistic regression estimation
166
167 Parameters
168 ----------
169 algorithm : ExplicitAlgorithm. The algorithm that will be applied.
170
171 start_vector : Numpy array. Generates the start vector that will be used.
172
173 class_weight : {dict, "auto"}, optional. Set the parameter weight of
174 sample belonging to class i to class_weight[i]. If not given, all
175 classes are supposed to have weight one. The "auto" mode uses the
176 values of y to automatically adjust weights inversely proportional
177 to class frequencies.
178 """
179 __metaclass__ = abc.ABCMeta
180
189
190 @abc.abstractmethod
191 - def fit(self, X, y):
192 """Fit the model to the data.
193 """
194 raise NotImplementedError('Abstract method "fit" must be '
195 'specialised!')
196
198 """Return a predicted y corresponding to the X given and the beta
199 previously determined.
200 """
201 X = check_arrays(X)
202 prob = self.predict_probability(X)
203 y = np.ones((X.shape[0], 1))
204 y[prob < 0.5] = 0.0
205
206 return y
207
209 X = check_arrays(X)
210 logit = np.dot(X, self.beta)
211
212 prob = np.reciprocal(1.0 + np.exp(-logit))
213
214 return prob
215
217 """Rate of correct classification.
218 """
219 yhat = self.predict(X)
220 rate = np.mean(y == yhat)
221
222 return rate
223
226 """Linear regression:
227
228 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2,
229
230 where ||.||²_2 is the squared L2-norm.
231
232 Parameters
233 ----------
234 algorithm : ExplicitAlgorithm. The algorithm that should be used.
235 Should be one of:
236 1. GradientDescent(...)
237
238 Default is GradientDescent(...).
239
240 algorithm_params : A dict. The dictionary algorithm_params contains
241 parameters that should be set in the algorithm. Passing
242 algorithm=GradientDescent(**params) is equivalent to passing
243 algorithm=GradientDescent() and algorithm_params=params. Default
244 is an empty dictionary.
245
246 start_vector : BaseStartVector. Generates the start vector that will be
247 used.
248
249 mean : Boolean. Whether to compute the squared loss or the mean squared
250 loss. Default is True, the mean squared loss.
251
252 Examples
253 --------
254 >>> import numpy as np
255 >>> import parsimony.estimators as estimators
256 >>> import parsimony.algorithms.gradient as gradient
257 >>>
258 >>> np.random.seed(42)
259 >>>
260 >>> n = 10
261 >>> p = 16
262 >>> X = np.random.rand(n, p)
263 >>> y = np.random.rand(n, 1)
264 >>> lr = estimators.LinearRegression(algorithm=gradient.GradientDescent(),
265 ... algorithm_params=dict(max_iter=1000),
266 ... mean=False)
267 >>> error = lr.fit(X, y).score(X, y)
268 >>> print "error = ", error
269 error = 0.0116466703591
270 """
284
286 """Return a dictionary containing the estimator's parameters
287 """
288 return {"mean": self.mean}
289
290 - def fit(self, X, y, beta=None):
307
309 """Returns the (mean) squared error of the estimator.
310 """
311 X, y = check_arrays(X, y)
312 n, p = X.shape
313 y_hat = np.dot(X, self.beta)
314 err = np.sum((y_hat - y) ** 2.0)
315 if self.mean:
316 err /= float(n)
317
318 return err
319
322 """Linear regression with an L2 penalty. Represents the function:
323
324 f(beta, X, y) = (1 / (2 * n)) * ||X * beta - y||²_2
325 + (l / 2) * ||beta||²_2,
326
327 where ||.||²_2 is the squared L2-norm.
328
329 Parameters
330 ----------
331 l : Non-negative float. The L2 regularisation parameter.
332
333 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
334 Should be one of:
335 1. FISTA(...)
336 2. ISTA(...)
337
338 Default is FISTA(...).
339
340 algorithm_params : A dict. The dictionary algorithm_params contains
341 parameters that should be set in the algorithm. Passing
342 algorithm=FISTA(**params) is equivalent to passing
343 algorithm=FISTA() and algorithm_params=params. Default is
344 an empty dictionary.
345
346 start_vector : BaseStartVector. Generates the start vector that will be
347 used.
348
349 penalty_start : Non-negative integer. The number of columns, variables
350 etc., to be exempt from penalisation. Equivalently, the first
351 index to be penalised. Default is 0, all columns are included.
352
353 mean : Boolean. Whether to compute the squared loss or the mean squared
354 loss. Default is True, the mean squared loss.
355
356 Examples
357 --------
358 >>> import numpy as np
359 >>> import parsimony.estimators as estimators
360 >>> import parsimony.algorithms.proximal as proximal
361 >>> n = 10
362 >>> p = 16
363 >>>
364 >>> np.random.seed(42)
365 >>> X = np.random.rand(n, p)
366 >>> y = np.random.rand(n, 1)
367 >>> l = 0.618 # Regularisation coefficient
368 >>> rr = estimators.RidgeRegression(l,
369 ... algorithm=proximal.FISTA(),
370 ... algorithm_params=dict(max_iter=1000),
371 ... mean=False)
372 >>> error = rr.fit(X, y).score(X, y)
373 >>> print "error = ", error
374 error = 0.377679437659
375 """
379
380 if algorithm is None:
381 algorithm = proximal.FISTA(**algorithm_params)
382 else:
383 algorithm.set_params(**algorithm_params)
384
385 super(RidgeRegression, self).__init__(algorithm=algorithm,
386 start_vector=start_vector)
387
388 self.l = float(l)
389
390 self.penalty_start = int(penalty_start)
391 self.mean = bool(mean)
392
394 """Return a dictionary containing the estimator's parameters.
395 """
396 return {"l": self.l,
397 "penalty_start": self.penalty_start, "mean": self.mean}
398
399 - def fit(self, X, y, beta=None):
419
421 """Returns the (mean) squared error of the estimator.
422 """
423 X, y = check_arrays(X, y)
424 n, p = X.shape
425 y_hat = np.dot(X, self.beta)
426 err = np.sum((y_hat - y) ** 2.0)
427 if self.mean:
428 err /= float(n)
429
430 return err
431
432
433 -class Lasso(RegressionEstimator):
434 """Linear regression with an L1 penalty:
435
436 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 + l * ||beta||_1,
437
438 where ||.||_1 is the L1-norm.
439
440 Parameters
441 ----------
442 l : Non-negative float. The Lagrange multiplier, or regularisation
443 constant, of the function.
444
445 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
446 Should be one of:
447 1. FISTA(...)
448 2. ISTA(...)
449
450 Default is FISTA(...).
451
452 algorithm_params : A dict. The dictionary algorithm_params contains
453 parameters that should be set in the algorithm. Passing
454 algorithm=FISTA(**params) is equivalent to passing
455 algorithm=FISTA() and algorithm_params=params. Default is
456 an empty dictionary.
457
458 start_vector : BaseStartVector. Generates the start vector that will be
459 used.
460
461 penalty_start : Non-negative integer. The number of columns, variables
462 etc., to be exempt from penalisation. Equivalently, the first
463 index to be penalised. Default is 0, all columns are included.
464
465 mean : Boolean. Whether to compute the squared loss or the mean squared
466 loss. Default is True, the mean squared loss.
467
468 Examples
469 --------
470 >>> import numpy as np
471 >>> import parsimony.estimators as estimators
472 >>> import parsimony.algorithms.proximal as proximal
473 >>> n = 10
474 >>> p = 16
475 >>>
476 >>> np.random.seed(42)
477 >>> X = np.random.rand(n, p)
478 >>> y = np.random.rand(n, 1)
479 >>> l1 = 0.1 # L1 coefficient
480 >>> lasso = estimators.Lasso(l1,
481 ... algorithm=proximal.FISTA(),
482 ... algorithm_params=dict(max_iter=1000),
483 ... mean=False)
484 >>> error = lasso.fit(X, y).score(X, y)
485 >>> print "error = ", error
486 error = 0.395494642796
487 """
493
494 if algorithm is None:
495 algorithm = proximal.FISTA(**algorithm_params)
496 else:
497 algorithm.set_params(**algorithm_params)
498
499 super(Lasso, self).__init__(algorithm=algorithm,
500 start_vector=start_vector)
501
502 self.l = float(l)
503
504 self.penalty_start = int(penalty_start)
505 self.mean = bool(mean)
506
508 """Return a dictionary containing the estimator's parameters
509 """
510 return {"l": self.l,
511 "penalty_start": self.penalty_start,
512 "mean": self.mean}
513
514 - def fit(self, X, y, beta=None):
534
536 """Returns the (mean) squared error of the estimator.
537 """
538 X, y = check_arrays(X, y)
539 n, p = X.shape
540 y_hat = np.dot(X, self.beta)
541 err = np.sum((y_hat - y) ** 2.0)
542 if self.mean:
543 err /= float(n)
544
545 return err
546
549 """Linear regression with L1 and L2 penalties. Represents the function:
550
551 f(beta, X, y) = (1 / (2 * n)) * ||X * beta - y||²_2
552 + alpha * l * ||beta||_1
553 + alpha * ((1.0 - l) / 2) * ||beta||²_2,
554
555 where ||.||²_2 is the squared L2-norm and ||.||_1 is the L1-norm.
556
557 Parameters
558 ----------
559 l : Non-negative float. The Lagrange multiplier, or regularisation
560 constant, of the function.
561
562 alpha : Non-negative float. Scaling parameter of the regularisation.
563 Default is 1.
564
565 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
566 Should be one of:
567 1. FISTA(...)
568 2. ISTA(...)
569
570 Default is FISTA(...).
571
572 algorithm_params : A dict. The dictionary algorithm_params contains
573 parameters that should be set in the algorithm. Passing
574 algorithm=FISTA(**params) is equivalent to passing
575 algorithm=FISTA() and algorithm_params=params. Default is
576 an empty dictionary.
577
578 start_vector : BaseStartVector. Generates the start vector that will be
579 used.
580
581 penalty_start : Non-negative integer. The number of columns, variables
582 etc., to be exempt from penalisation. Equivalently, the first
583 index to be penalised. Default is 0, all columns are included.
584
585 mean : Boolean. Whether to compute the squared loss or the mean squared
586 loss. Default is True, the mean squared loss.
587
588 Examples
589 --------
590 >>> import numpy as np
591 >>> import parsimony.estimators as estimators
592 >>> import parsimony.algorithms.proximal as proximal
593 >>> n = 10
594 >>> p = 16
595 >>>
596 >>> np.random.seed(42)
597 >>> X = np.random.rand(n, p)
598 >>> y = np.random.rand(n, 1)
599 >>> l = 0.1 # Regularisation coefficient
600 >>> en = estimators.ElasticNet(l,
601 ... algorithm=proximal.FISTA(),
602 ... algorithm_params=dict(max_iter=1000),
603 ... mean=False)
604 >>> error = en.fit(X, y).score(X, y)
605 >>> print "error = ", round(error, 13)
606 error = 0.492096328053
607 """
611
612 if algorithm is None:
613 algorithm = proximal.FISTA(**algorithm_params)
614 else:
615 algorithm.set_params(**algorithm_params)
616
617 super(ElasticNet, self).__init__(algorithm=algorithm,
618 start_vector=start_vector)
619
620 self.l = float(l)
621 self.alpha = float(alpha)
622
623 self.penalty_start = int(penalty_start)
624 self.mean = bool(mean)
625
627 """Return a dictionary containing the estimator's parameters.
628 """
629 return {"l": self.l, "alpha": self.alpha,
630 "penalty_start": self.penalty_start, "mean": self.mean}
631
632 - def fit(self, X, y, beta=None):
654
656 """Returns the (mean) squared error of the estimator.
657 """
658 X, y = check_arrays(X, y)
659 n, p = X.shape
660 y_hat = np.dot(X, self.beta)
661 err = np.sum((y_hat - y) ** 2.0)
662 if self.mean:
663 err /= float(n)
664
665 return err
666
669 """Linear regression with L1, L2 and TV penalties:
670
671 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2
672 + l1 * ||beta||_1
673 + (l2 / 2) * ||beta||²_2
674 + tv * TV(beta)
675
676 Parameters
677 ----------
678 l1 : Non-negative float. The L1 regularization parameter.
679
680 l2 : Non-negative float. The L2 regularization parameter.
681
682 tv : Non-negative float. The total variation regularization parameter.
683
684 A : Numpy or (usually) scipy.sparse array. The linear operator for the
685 smoothed total variation Nesterov function. A must be given.
686
687 mu : Non-negative float. The regularisation constant for the smoothing.
688
689 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
690 Should be one of:
691 1. CONESTA(...)
692 2. StaticCONESTA(...)
693 3. FISTA(...)
694 4. ISTA(...)
695 5. ADMM(...)
696 6. NaiveCONESTA(...)
697
698 Default is CONESTA(...).
699
700 algorithm_params : A dict. The dictionary algorithm_params contains
701 parameters that should be set in the algorithm. Passing
702 algorithm=CONESTA(**params) is equivalent to passing
703 algorithm=CONESTA() and algorithm_params=params. Default is an
704 empty dictionary.
705
706 penalty_start : Non-negative integer. The number of columns, variables
707 etc., to be exempt from penalisation. Equivalently, the first
708 index to be penalised. Default is 0, all columns are included.
709
710 mean : Boolean. Whether to compute the squared loss or the mean squared
711 loss. Default is True, the mean squared loss.
712
713 rho : Positive float. Regularisation constant only used in ADMM. Default
714 is 1.0.
715
716 Examples
717 --------
718 >>> import numpy as np
719 >>> import parsimony.estimators as estimators
720 >>> import parsimony.algorithms.proximal as proximal
721 >>> import parsimony.functions.nesterov.tv as total_variation
722 >>> shape = (1, 4, 4)
723 >>> n = 10
724 >>> p = shape[0] * shape[1] * shape[2]
725 >>>
726 >>> np.random.seed(42)
727 >>> X = np.random.rand(n, p)
728 >>> y = np.random.rand(n, 1)
729 >>> l1 = 0.1 # L1 coefficient
730 >>> l2 = 0.9 # Ridge coefficient
731 >>> tv = 1.0 # TV coefficient
732 >>> A, n_compacts = total_variation.linear_operator_from_shape(shape)
733 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A,
734 ... algorithm=proximal.StaticCONESTA(max_iter=1000),
735 ... mean=False)
736 >>> res = lr.fit(X, y)
737 >>> round(lr.score(X, y), 13)
738 0.0683842576534
739 >>>
740 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A,
741 ... algorithm=proximal.CONESTA(max_iter=1000),
742 ... mean=False)
743 >>> res = lr.fit(X, y)
744 >>> round(lr.score(X, y), 13)
745 0.0683583406798
746 >>>
747 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A,
748 ... algorithm=proximal.FISTA(max_iter=1000),
749 ... mean=False)
750 >>> lr = lr.fit(X, y)
751 >>> round(lr.score(X, y), 13)
752 1.5817577127184
753 >>>
754 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A,
755 ... algorithm=proximal.ISTA(max_iter=1000),
756 ... mean=False)
757 >>> lr = lr.fit(X, y)
758 >>> round(lr.score(X, y), 14)
759 2.07583068899674
760 >>>
761 >>> import parsimony.functions.nesterov.l1tv as l1tv
762 >>> np.random.seed(1337)
763 >>> A = l1tv.linear_operator_from_shape(shape, p, penalty_start=0)
764 >>> lr = estimators.LinearRegressionL1L2TV(l1, l2, tv, A,
765 ... algorithm=proximal.ADMM(max_iter=1000),
766 ... mean=False)
767 >>> lr = lr.fit(X, y)
768 >>> round(lr.score(X, y), 13)
769 0.0623552412543
770 """
771 - def __init__(self, l1, l2, tv,
772 A=None, mu=consts.TOLERANCE,
773 algorithm=None, algorithm_params=dict(),
774 penalty_start=0,
775 mean=True,
776 rho=1.0):
807
809 """Return a dictionary containing all the estimator's parameters
810 """
811 return {"l1": self.l1, "l2": self.l2, "tv": self.tv,
812 "A": self.A, "mu": self.mu,
813 "penalty_start": self.penalty_start, "mean": self.mean,
814 "rho": self.rho}
815
816 - def fit(self, X, y, beta=None):
817 """Fit the estimator to the data.
818 """
819 X, y = check_arrays(X, y)
820
821 if isinstance(self.algorithm, proximal.ADMM):
822
823 function = functions.AugmentedLinearRegressionL1L2TV(X, y,
824 self.l1, self.l2, self.tv,
825 A=self.A,
826 rho=self.rho,
827 penalty_start=self.penalty_start,
828 mean=self.mean)
829
830
831
832 p = X.shape[1]
833 if beta is None:
834 x = self.start_vector.get_vector(p)
835 r = self.start_vector.get_vector(2 * p)
836 else:
837 x = beta
838
839 r = np.vstack((np.zeros((p, 1)),
840 np.zeros((p, 1))))
841
842 xr = linalgs.MultipartArray([x, r])
843 beta = [xr, xr]
844
845 self.tv_function = functions.nesterov.tv.TotalVariation(self.tv,
846 A=self.A,
847 mu=self.mu,
848 penalty_start=self.penalty_start)
849
850 else:
851 function = functions.LinearRegressionL1L2TV(X, y,
852 self.l1, self.l2, self.tv,
853 A=self.A,
854 penalty_start=self.penalty_start,
855 mean=self.mean)
856
857 self.tv_function = function.tv
858
859
860
861 if beta is None:
862 beta = self.start_vector.get_vector(X.shape[1])
863
864 self.algorithm.check_compatibility(function,
865 self.algorithm.INTERFACES)
866
867 if self.mu is None:
868
869 self.mu = self.tv_function.estimate_mu(beta)
870
871 function.set_params(mu=self.mu)
872 self.beta = self.algorithm.run(function, beta)
873
874 if isinstance(self.algorithm, proximal.ADMM):
875 self.beta = self.beta.get_parts()[0]
876
877 return self
878
880 """Return the mean squared error of the estimator.
881 """
882 X, y = check_arrays(X, y)
883 n, p = X.shape
884 y_hat = np.dot(X, self.beta)
885
886 return np.sum((y_hat - y) ** 2.0) / float(n)
887
890 """Linear regression with L1, L2 and Group lasso penalties:
891
892 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2
893 + l1 * ||beta||_1
894 + (l2 / 2) * ||beta||²_2
895 + gl * GL(beta)
896
897 Parameters
898 ----------
899 l1 : Non-negative float. The L1 regularization parameter.
900
901 l2 : Non-negative float. The L2 regularization parameter.
902
903 tv : Non-negative float. The group lasso regularization parameter.
904
905 A : Numpy or (usually) scipy.sparse array. The linear operator for the
906 smoothed group lasso Nesterov function. A must be given.
907
908 mu : Non-negative float. The regularisation constant for the smoothing.
909
910 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
911 Should be one of:
912 1. FISTA(...)
913 2. ISTA(...)
914 3. StaticCONESTA(...)
915
916 Default is FISTA(...).
917
918 algorithm_params : A dict. The dictionary algorithm_params contains
919 parameters that should be set in the algorithm. Passing
920 algorithm=FISTA(**params) is equivalent to passing
921 algorithm=FISTA() and algorithm_params=params. Default is an empty
922 dictionary.
923
924 penalty_start : Non-negative integer. The number of columns, variables
925 etc., to be exempt from penalisation. Equivalently, the first
926 index to be penalised. Default is 0, all columns are included.
927
928 mean : Boolean. Whether to compute the squared loss or the mean squared
929 loss. Default is True, the mean squared loss.
930
931 Examples
932 --------
933 >>> import numpy as np
934 >>> import parsimony.estimators as estimators
935 >>> import parsimony.algorithms.proximal as proximal
936 >>> import parsimony.functions.nesterov.gl as group_lasso
937 >>> n = 10
938 >>> p = 15
939 >>>
940 >>> np.random.seed(42)
941 >>> X = np.random.rand(n, p)
942 >>> y = np.random.rand(n, 1)
943 >>> l1 = 0.1 # L1 coefficient
944 >>> l2 = 0.9 # Ridge coefficient
945 >>> gl = 1.0 # GL coefficient
946 >>> groups = [range(0, 10), range(5, 15)]
947 >>> A = group_lasso.linear_operator_from_groups(p, groups, weights=None,
948 ... penalty_start=0)
949 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A,
950 ... algorithm=proximal.StaticCONESTA(),
951 ... algorithm_params=dict(max_iter=1000),
952 ... mean=False)
953 >>> res = lr.fit(X, y)
954 >>> round(lr.score(X, y), 13)
955 0.6101838224235
956 >>>
957 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A,
958 ... algorithm=proximal.CONESTA(),
959 ... algorithm_params=dict(max_iter=1000),
960 ... mean=False)
961 >>> res = lr.fit(X, y)
962 >>> round(lr.score(X, y), 11)
963 0.61139525439
964 >>>
965 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A,
966 ... algorithm=proximal.FISTA(),
967 ... algorithm_params=dict(max_iter=1000),
968 ... mean=False)
969 >>> lr = lr.fit(X, y)
970 >>> round(lr.score(X, y), 12)
971 10.7465249393
972 >>>
973 >>> lr = estimators.LinearRegressionL1L2GL(l1, l2, gl, A,
974 ... algorithm=proximal.ISTA(),
975 ... algorithm_params=dict(max_iter=1000),
976 ... mean=False)
977 >>> lr = lr.fit(X, y)
978 >>> round(lr.score(X, y), 14)
979 11.02462114246791
980 """
981 - def __init__(self, l1, l2, gl,
982 A=None, mu=consts.TOLERANCE,
983 algorithm=None, algorithm_params=dict(),
984 penalty_start=0,
985 mean=True):
1013
1015 """Return a dictionary containing all the estimator's parameters.
1016 """
1017 return {"l1": self.l1, "l2": self.l2, "gl": self.gl,
1018 "A": self.A, "mu": self.mu,
1019 "penalty_start": self.penalty_start,
1020 "mean": self.mean}
1021
1022 - def fit(self, X, y, beta=None):
1023 """Fit the estimator to the data
1024 """
1025 X, y = check_arrays(X, y)
1026
1027 function = functions.LinearRegressionL1L2GL(X, y,
1028 self.l1, self.l2, self.gl,
1029 A=self.A,
1030 penalty_start=self.penalty_start,
1031 mean=self.mean)
1032 self.algorithm.check_compatibility(function,
1033 self.algorithm.INTERFACES)
1034
1035
1036 if beta is None:
1037 beta = self.start_vector.get_vector(X.shape[1])
1038
1039 if self.mu is None:
1040 self.mu = function.estimate_mu(beta)
1041
1042 function.set_params(mu=self.mu)
1043 self.beta = self.algorithm.run(function, beta)
1044
1045 return self
1046
1048 """Return the (mean) squared error of the estimator.
1049 """
1050 X, y = check_arrays(X, y)
1051 n, p = X.shape
1052 y_hat = np.dot(X, self.beta)
1053 err = np.sum((y_hat - y) ** 2.0)
1054 if self.mean:
1055 err /= float(n)
1056
1057 return err
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186 -class LogisticRegression(LogisticRegressionEstimator):
1187 """Logistic regression (re-weighted log-likelihood aka. cross-entropy):
1188
1189 f(beta) = -loglik / n_samples
1190
1191 where
1192
1193 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)),
1194
1195 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)),
1196
1197 wi = weight of sample i.
1198
1199 Parameters
1200 ----------
1201 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1202 Should be one of:
1203 1. GradientDescent(...)
1204
1205 Default is GradientDescent(...).
1206
1207 algorithm_params : A dict. The dictionary algorithm_params contains
1208 parameters that should be set in the algorithm. Passing
1209 algorithm=MyAlgorithm(**params) is equivalent to passing
1210 algorithm=MyAlgorithm() and algorithm_params=params. Default is
1211 an empty dictionary.
1212
1213 class_weight : Dict, 'auto' or None. If 'auto', class weights will be
1214 given inverse proportional to the frequency of the class in
1215 the data. If a dictionary is given, keys are classes and values
1216 are corresponding class weights. If None is given, the class
1217 weights will be uniform.
1218
1219 penalty_start : Non-negative integer. The number of columns, variables
1220 etc., to be exempt from penalisation. Equivalently, the first
1221 index to be penalised. Default is 0, all columns are included.
1222
1223 mean : Boolean. Whether to compute the squared loss or the mean squared
1224 loss. Default is True, the mean squared loss.
1225
1226 Examples
1227 --------
1228 >>> import numpy as np
1229 >>> import parsimony.estimators as estimators
1230 >>> import parsimony.algorithms.gradient as gradient
1231 >>> import parsimony.functions.nesterov.tv as total_variation
1232 >>> shape = (1, 4, 4)
1233 >>> n = 10
1234 >>> p = shape[0] * shape[1] * shape[2]
1235 >>>
1236 >>> np.random.seed(42)
1237 >>> X = np.random.rand(n, p)
1238 >>> y = np.random.randint(0, 2, (n, 1))
1239 >>> lr = estimators.LogisticRegression(
1240 ... algorithm=gradient.GradientDescent(max_iter=1000),
1241 ... mean=False)
1242 >>> res = lr.fit(X, y)
1243 >>> error = lr.score(X, y)
1244 >>> print "error = ", error
1245 error = 1.0
1246 """
1247 - def __init__(self, algorithm=None, algorithm_params=dict(),
1248 class_weight=None,
1249 penalty_start=0,
1250 mean=True):
1251
1252 if algorithm is None:
1253 algorithm = gradient.GradientDescent(**algorithm_params)
1254 else:
1255 algorithm.set_params(**algorithm_params)
1256
1257 super(LogisticRegression, self).__init__(algorithm=algorithm,
1258 class_weight=class_weight)
1259
1260 self.penalty_start = int(penalty_start)
1261 self.mean = bool(mean)
1262
1264 """Return a dictionary containing all the estimator's parameters.
1265 """
1266 return {"class_weight": self.class_weight,
1267 "penalty_start": self.penalty_start, "mean": self.mean}
1268
1269 - def fit(self, X, y, beta=None, sample_weight=None):
1270 """Fit the estimator to the data.
1271 """
1272 X, y = check_arrays(X, check_labels(y))
1273 if sample_weight is None:
1274 sample_weight = class_weight_to_sample_weight(self.class_weight, y)
1275 y, sample_weight = check_arrays(y, sample_weight)
1276
1277
1278 function = losses.LogisticRegression(X, y,
1279 weights=sample_weight,
1280 mean=self.mean)
1281
1282 self.algorithm.check_compatibility(function,
1283 self.algorithm.INTERFACES)
1284
1285
1286 if beta is None:
1287 beta = self.start_vector.get_vector(X.shape[1])
1288
1289 self.beta = self.algorithm.run(function, beta)
1290
1291 return self
1292
1295 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) with
1296 an L2 penalty:
1297
1298 f(beta) = -loglik / n_samples + (l / 2) * ||beta||²_2,
1299
1300 where
1301
1302 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)),
1303
1304 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)),
1305
1306 wi = weight of sample i,
1307
1308 and ||.||²_2 is the squared L2-norm.
1309
1310 Parameters
1311 ----------
1312 l : Non-negative float. The L2 regularisation parameter.
1313
1314 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1315 Should be one of:
1316 1. GradientDescent(...)
1317
1318 Default is GradientDescent(...).
1319
1320 algorithm_params : A dict. The dictionary algorithm_params contains
1321 parameters that should be set in the algorithm. Passing
1322 algorithm=MyAlgorithm(**params) is equivalent to passing
1323 algorithm=MyAlgorithm() and algorithm_params=params. Default is
1324 an empty dictionary.
1325
1326 class_weight : Dict, 'auto' or None. If 'auto', class weights will be
1327 given inverse proportional to the frequency of the class in
1328 the data. If a dictionary is given, keys are classes and values
1329 are corresponding class weights. If None is given, the class
1330 weights will be uniform.
1331
1332 penalty_start : Non-negative integer. The number of columns, variables
1333 etc., to be exempt from penalisation. Equivalently, the first
1334 index to be penalised. Default is 0, all columns are included.
1335
1336 mean : Boolean. Whether to compute the mean loss or not. Default is True,
1337 the mean loss.
1338
1339 Examples
1340 --------
1341 >>> import numpy as np
1342 >>> import parsimony.estimators as estimators
1343 >>> import parsimony.algorithms.gradient as gradient
1344 >>> n, p = 10, 16
1345 >>>
1346 >>> np.random.seed(1337)
1347 >>> X = np.random.rand(n, p)
1348 >>> y = np.random.randint(0, 2, (n, 1))
1349 >>> l = 1.0
1350 >>> lr = estimators.RidgeLogisticRegression(l,
1351 ... algorithm=gradient.GradientDescent(max_iter=1000),
1352 ... mean=False)
1353 >>> res = lr.fit(X, y)
1354 >>> pred = lr.score(X, y)
1355 >>> print "prediction rate = %.1f" % pred
1356 prediction rate = 0.9
1357 """
1358 - def __init__(self, l,
1359 algorithm=None, algorithm_params=dict(),
1360 class_weight=None,
1361 penalty_start=0,
1362 mean=True):
1363
1364 self.l = max(0.0, float(l))
1365
1366 if algorithm is None:
1367 algorithm = gradient.GradientDescent(**algorithm_params)
1368 else:
1369 algorithm.set_params(**algorithm_params)
1370
1371 super(RidgeLogisticRegression, self).__init__(algorithm=algorithm,
1372 class_weight=class_weight)
1373
1374 self.penalty_start = max(0, int(penalty_start))
1375 self.mean = bool(mean)
1376
1378 """Return a dictionary containing all the estimator's parameters.
1379 """
1380 return {"class_weight": self.class_weight,
1381 "penalty_start": self.penalty_start,
1382 "mean": self.mean}
1383
1384 - def fit(self, X, y, beta=None, sample_weight=None):
1385 """Fit the estimator to the data.
1386 """
1387 X, y = check_arrays(X, check_labels(y))
1388 if sample_weight is None:
1389 sample_weight = class_weight_to_sample_weight(self.class_weight, y)
1390 y, sample_weight = check_arrays(y, sample_weight)
1391
1392
1393 function = losses.RidgeLogisticRegression(X, y, self.l,
1394 weights=sample_weight,
1395 penalty_start=self.penalty_start,
1396 mean=self.mean)
1397
1398
1399
1400
1401
1402
1403 self.algorithm.check_compatibility(function,
1404 self.algorithm.INTERFACES)
1405
1406
1407 if beta is None:
1408 beta = self.start_vector.get_vector(X.shape[1])
1409
1410 self.beta = self.algorithm.run(function, beta)
1411
1412 return self
1413
1416 """Logistic regression (re-weighted log-likelihood aka. cross-entropy) with
1417 with L1 and L2 penalties:
1418
1419 f(beta) = -loglik / n_samples
1420 + alpha * l * ||beta||_1
1421 + alpha * ((1.0 - l) / 2) * ||beta||²_2,
1422
1423 where
1424
1425 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)),
1426
1427 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)),
1428
1429 wi = weight of sample i,
1430
1431 and ||.||²_2 is the squared L2-norm.
1432
1433 Parameters
1434 ----------
1435 l : Non-negative float. The L2 regularisation parameter.
1436
1437 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1438 Should be one of:
1439 1. GradientDescent(...)
1440
1441 Default is GradientDescent(...).
1442
1443 algorithm_params : A dict. The dictionary algorithm_params contains
1444 parameters that should be set in the algorithm. Passing
1445 algorithm=MyAlgorithm(**params) is equivalent to passing
1446 algorithm=MyAlgorithm() and algorithm_params=params. Default is
1447 an empty dictionary.
1448
1449 class_weight : Dict, 'auto' or None. If 'auto', class weights will be
1450 given inverse proportional to the frequency of the class in
1451 the data. If a dictionary is given, keys are classes and values
1452 are corresponding class weights. If None is given, the class
1453 weights will be uniform.
1454
1455 penalty_start : Non-negative integer. The number of columns, variables
1456 etc., to be exempt from penalisation. Equivalently, the first
1457 index to be penalised. Default is 0, all columns are included.
1458
1459 mean : Boolean. Whether to compute the mean loss or not. Default is True,
1460 the mean loss.
1461
1462 Examples
1463 --------
1464 """
1465 - def __init__(self, l, alpha=1.0,
1466 algorithm=None, algorithm_params=dict(),
1467 class_weight=None,
1468 penalty_start=0,
1469 mean=True):
1470
1471 self.l = max(0.0, float(l))
1472 self.alpha = float(alpha)
1473
1474 if algorithm is None:
1475 algorithm = proximal.FISTA(**algorithm_params)
1476 else:
1477 algorithm.set_params(**algorithm_params)
1478
1479 super(ElasticNetLogisticRegression, self).__init__(algorithm=algorithm,
1480 class_weight=class_weight)
1481
1482 self.penalty_start = max(0, int(penalty_start))
1483 self.mean = bool(mean)
1484
1486 """Return a dictionary containing all the estimator's parameters.
1487 """
1488 return {"class_weight": self.class_weight,
1489 "penalty_start": self.penalty_start,
1490 "mean": self.mean}
1491
1492 - def fit(self, X, y, beta=None, sample_weight=None):
1493 """Fit the estimator to the data.
1494 """
1495 X, y = check_arrays(X, check_labels(y))
1496 if sample_weight is None:
1497 sample_weight = class_weight_to_sample_weight(self.class_weight, y)
1498 y, sample_weight = check_arrays(y, sample_weight)
1499
1500 function = functions.CombinedFunction()
1501 function.add_loss(losses.LogisticRegression(X, y, mean=self.mean))
1502 function.add_penalty(penalties.L2Squared(l=self.alpha * (1.0 - self.l),
1503 penalty_start=self.penalty_start))
1504 function.add_prox(penalties.L1(l=self.alpha * self.l,
1505 penalty_start=self.penalty_start))
1506
1507 self.algorithm.check_compatibility(function,
1508 self.algorithm.INTERFACES)
1509
1510
1511 if beta is None:
1512 beta = self.start_vector.get_vector(X.shape[1])
1513
1514 self.beta = self.algorithm.run(function, beta)
1515
1516 return self
1517
1520 """Logistic regression (re-weighted log-likelihood aka. cross-entropy)
1521 with L1, L2 and TV penalties:
1522
1523 f(beta) = -loglik / n_samples
1524 + l1 * ||beta||_1
1525 + (l2 / 2) * ||beta||²_2
1526 + tv * TV(beta)
1527 where
1528 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)),
1529
1530 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)),
1531
1532 wi = weight of sample i.
1533
1534 Parameters
1535 ----------
1536 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1537 constant, for the L1 penalty.
1538
1539 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1540 constant, for the ridge (L2) penalty.
1541
1542 tv : Non-negative float. The Lagrange multiplier, or regularisation
1543 constant, of the TV function.
1544
1545 A : Numpy or (usually) scipy.sparse array. The linear operator for the
1546 smoothed total variation Nesterov function. A must be given.
1547
1548 mu : Non-negative float. The regularisation constant for the smoothing.
1549
1550 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1551 Should be one of:
1552 1. CONESTA(...)
1553 2. StaticCONESTA(...)
1554 3. FISTA(...)
1555 4. ISTA(...)
1556
1557 Default is CONESTA(...).
1558
1559 algorithm_params : A dict. The dictionary algorithm_params contains
1560 parameters that should be set in the algorithm. Passing
1561 algorithm=CONESTA(**params) is equivalent to passing
1562 algorithm=CONESTA() and algorithm_params=params. Default is an
1563 empty dictionary.
1564
1565 class_weight : Dict, 'auto' or None. If 'auto', class weights will be
1566 given inverse proportional to the frequency of the class in
1567 the data. If a dictionary is given, keys are classes and values
1568 are corresponding class weights. If None is given, the class
1569 weights will be uniform.
1570
1571 penalty_start : Non-negative integer. The number of columns, variables
1572 etc., to be exempt from penalisation. Equivalently, the first
1573 index to be penalised. Default is 0, all columns are included.
1574
1575 mean : Boolean. Whether to compute the squared loss or the mean squared
1576 loss. Default is True, the mean squared loss.
1577
1578 Examples
1579 --------
1580 >>> import numpy as np
1581 >>> import parsimony.estimators as estimators
1582 >>> import parsimony.algorithms.proximal as proximal
1583 >>> import parsimony.functions.nesterov.tv as total_variation
1584 >>> shape = (1, 4, 4)
1585 >>> n = 10
1586 >>> p = shape[0] * shape[1] * shape[2]
1587 >>>
1588 >>> np.random.seed(42)
1589 >>> X = np.random.rand(n, p)
1590 >>> y = np.random.randint(0, 2, (n, 1))
1591 >>> l1 = 0.1 # L1 coefficient
1592 >>> l2 = 0.9 # Ridge coefficient
1593 >>> tv = 1.0 # TV coefficient
1594 >>> A, n_compacts = total_variation.linear_operator_from_shape(shape)
1595 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A,
1596 ... algorithm=proximal.StaticCONESTA(max_iter=1000),
1597 ... mean=False)
1598 >>> res = lr.fit(X, y)
1599 >>> error = lr.score(X, y)
1600 >>> print "error = ", error
1601 error = 0.7
1602 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A,
1603 ... algorithm=proximal.FISTA(max_iter=1000),
1604 ... mean=False)
1605 >>> lr = lr.fit(X, y)
1606 >>> error = lr.score(X, y)
1607 >>> print "error = ", error
1608 error = 0.7
1609 >>> lr = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A,
1610 ... algorithm=proximal.ISTA(max_iter=1000),
1611 ... mean=False)
1612 >>> lr = lr.fit(X, y)
1613 >>> error = lr.score(X, y)
1614 >>> print "error = ", error
1615 error = 0.7
1616 """
1617 - def __init__(self, l1, l2, tv,
1618 A=None, mu=consts.TOLERANCE,
1619 algorithm=None, algorithm_params=dict(),
1620 class_weight=None,
1621 penalty_start=0,
1622 mean=True):
1651
1653 """Return a dictionary containing all the estimator's parameters.
1654 """
1655 return {"l1": self.l1, "l2": self.l2, "tv": self.tv,
1656 "A": self.A, "mu": self.mu, "class_weight": self.class_weight,
1657 "penalty_start": self.penalty_start, "mean": self.mean}
1658
1659 - def fit(self, X, y, beta=None, sample_weight=None):
1660 """Fit the estimator to the data.
1661 """
1662 X, y = check_arrays(X, check_labels(y))
1663 if sample_weight is None:
1664 sample_weight = class_weight_to_sample_weight(self.class_weight, y)
1665 y, sample_weight = check_arrays(y, sample_weight)
1666
1667
1668 function = functions.LogisticRegressionL1L2TV(X, y,
1669 self.l1, self.l2, self.tv,
1670 A=self.A,
1671 weights=sample_weight,
1672 penalty_start=self.penalty_start,
1673 mean=self.mean)
1674
1675 self.algorithm.check_compatibility(function,
1676 self.algorithm.INTERFACES)
1677
1678
1679 if beta is None:
1680 beta = self.start_vector.get_vector(X.shape[1])
1681
1682 if self.mu is None:
1683 self.mu = function.estimate_mu(beta)
1684 else:
1685 self.mu = float(self.mu)
1686
1687 function.set_params(mu=self.mu)
1688 self.beta = self.algorithm.run(function, beta)
1689
1690 return self
1691
1694 """Logistic regression (re-weighted log-likelihood aka. cross-entropy)
1695 with L1, L2 and Group Lasso penalties:
1696
1697 f(beta) = -loglik / n_samples
1698 + l1 * ||beta||_1
1699 + (l2 / (2 * n)) * ||beta||²_2
1700 + gl * GL(beta)
1701 where
1702 loglik = Sum wi * (yi * log(pi) + (1 − yi) * log(1 − pi)),
1703
1704 pi = p(y=1|xi, beta) = 1 / (1 + exp(-xi'*beta)),
1705
1706 wi = weight of sample i.
1707
1708 Parameters
1709 ----------
1710 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1711 constant, for the L1 penalty.
1712
1713 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1714 constant, for the ridge (L2) penalty.
1715
1716 gl : Non-negative float. The Lagrange multiplier, or regularisation
1717 constant, of the group lasso function.
1718
1719 A : Numpy or (usually) scipy.sparse array. The linear operator for the
1720 smoothed total variation Nesterov function. A must be given.
1721
1722 mu : Non-negative float. The regularisation constant for the Nesterov
1723 smoothing.
1724
1725 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1726 Should be one of:
1727 1. FISTA(...)
1728 2. ISTA(...)
1729 3. StaticCONESTA(...)
1730
1731 Default is FISTA(...).
1732
1733 algorithm_params : A dict. The dictionary algorithm_params contains
1734 parameters that should be set in the algorithm. Passing
1735 algorithm=FISTA(**params) is equivalent to passing
1736 algorithm=FISTA() and algorithm_params=params. Default is an empty
1737 dictionary.
1738
1739 class_weight : Dict, 'auto' or None. If 'auto', class weights will be
1740 given inverse proportional to the frequency of the class in
1741 the data. If a dictionary is given, keys are classes and values
1742 are corresponding class weights. If None is given, the class
1743 weights will be uniform.
1744
1745 penalty_start : Non-negative integer. The number of columns, variables
1746 etc., to be exempt from penalisation. Equivalently, the first
1747 index to be penalised. Default is 0, all columns are included.
1748
1749 mean : Boolean. Whether to compute the squared loss or the mean squared
1750 loss. Default is True, the mean squared loss.
1751
1752 Examples
1753 --------
1754 >>> import numpy as np
1755 >>> import parsimony.estimators as estimators
1756 >>> import parsimony.algorithms.proximal as proximal
1757 >>> import parsimony.functions.nesterov.gl as group_lasso
1758 >>>
1759 >>> np.random.seed(42)
1760 >>>
1761 >>> n, p = 10, 16
1762 >>> groups = [range(0, p / 2), range(p / 2, p)]
1763 >>> weights = [1.5, 0.5]
1764 >>> A = group_lasso.linear_operator_from_groups(p, groups=groups,
1765 ... weights=weights)
1766 >>>
1767 >>> X = np.random.rand(n, p)
1768 >>> y = np.random.randint(0, 2, (n, 1))
1769 >>> l1 = 0.1 # L1 coefficient
1770 >>> l2 = 0.9 # Ridge coefficient
1771 >>> gl = 1.0 # TV coefficient
1772 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A=A,
1773 ... algorithm=proximal.StaticCONESTA(max_iter=1000),
1774 ... mean=False)
1775 >>> res = lr.fit(X, y)
1776 >>> error = lr.score(X, y)
1777 >>> print "error = ", error
1778 error = 0.7
1779 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A=A,
1780 ... algorithm=proximal.FISTA(max_iter=1000),
1781 ... mean=False)
1782 >>> lr = lr.fit(X, y)
1783 >>> error = lr.score(X, y)
1784 >>> print "error = ", error
1785 error = 0.7
1786 >>> lr = estimators.LogisticRegressionL1L2GL(l1, l2, gl, A,
1787 ... algorithm=proximal.ISTA(max_iter=1000),
1788 ... mean=False)
1789 >>> lr = lr.fit(X, y)
1790 >>> error = lr.score(X, y)
1791 >>> print "error = ", error
1792 error = 0.7
1793 """
1794 - def __init__(self, l1, l2, gl,
1795 A=None, mu=consts.TOLERANCE,
1796 weigths=None,
1797 algorithm=None, algorithm_params=dict(),
1798 class_weight=None,
1799 penalty_start=0,
1800 mean=True):
1833
1835 """Returns a dictionary containing all the estimator's parameters.
1836 """
1837 return {"l1": self.l1, "l2": self.l2, "gl": self.gl,
1838 "A": self.A, "mu": self.mu, "class_weight": self.class_weight,
1839 "penalty_start": self.penalty_start, "mean": self.mean}
1840
1841 - def fit(self, X, y, beta=None, sample_weight=None):
1842 """Fit the estimator to the data.
1843 """
1844 X, y = check_arrays(X, check_labels(y))
1845 if sample_weight is None:
1846 sample_weight = class_weight_to_sample_weight(self.class_weight, y)
1847 y, sample_weight = check_arrays(y, sample_weight)
1848 function = functions.LogisticRegressionL1L2GL(X, y,
1849 self.l1, self.l2, self.gl,
1850 A=self.A,
1851 weights=sample_weight,
1852 penalty_start=self.penalty_start,
1853 mean=self.mean)
1854
1855 self.algorithm.check_compatibility(function,
1856 self.algorithm.INTERFACES)
1857
1858
1859 if beta is None:
1860 beta = self.start_vector.get_vector(X.shape[1])
1861
1862 if self.mu is None:
1863 self.mu = function.estimate_mu(beta)
1864 else:
1865 self.mu = float(self.mu)
1866
1867 function.set_params(mu=self.mu)
1868 self.beta = self.algorithm.run(function, beta)
1869
1870 return self
1871
1874 """Linear regression with L2 and simultaneously smoothed L1 and TV
1875 penalties:
1876
1877 f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2
1878 + (l2 / 2) * ||beta||²_2
1879 + L1TV(beta),
1880
1881 where L1TV is l1 * L1(beta) + tv * TV(beta) smoothed together by Nesterov's
1882 smoothing.
1883
1884 Parameters
1885 ----------
1886 l2 : Non-negative float. The Lagrange multiplier, or regularisation
1887 constant, for the ridge (L2) penalty.
1888
1889 l1 : Non-negative float. The Lagrange multiplier, or regularisation
1890 constant, for the L1 penalty.
1891
1892 tv : Non-negative float. The Lagrange multiplier, or regularisation
1893 constant, of the TV function.
1894
1895 A : A list or tuple with 4 elements of (usually sparse) arrays. The linear
1896 operator for the smoothed L1+TV. The first element must be the
1897 linear operator for L1 and the following three for TV. May not be
1898 None.
1899
1900 algorithm : ExplicitAlgorithm. The algorithm that should be applied.
1901 Should be one of:
1902 1. ExcessiveGapMethod(...)
1903
1904 Default is ExcessiveGapMethod(...).
1905
1906 algorithm_params : A dict. The dictionary algorithm_params contains
1907 parameters that should be set in the algorithm. Passing
1908 algorithm=ExcessiveGapMethod(**params) is equivalent to passing
1909 algorithm=ExcessiveGapMethod() and algorithm_params=params. Default
1910 is an empty dictionary.
1911
1912 penalty_start : Non-negative integer. The number of columns, variables
1913 etc., to be exempt from penalisation. Equivalently, the first
1914 index to be penalised. Default is 0, all columns are included.
1915
1916 mean : Boolean. Whether to compute the squared loss or the mean squared
1917 loss. Default is True, the mean squared loss.
1918
1919 Examples
1920 --------
1921 >>> import numpy as np
1922 >>> import parsimony.estimators as estimators
1923 >>> import parsimony.algorithms.primaldual as primaldual
1924 >>> import parsimony.functions.nesterov.l1tv as l1tv
1925 >>> shape = (1, 4, 4)
1926 >>> n = 10
1927 >>> p = shape[0] * shape[1] * shape[2]
1928 >>>
1929 >>> np.random.seed(42)
1930 >>> X = np.random.rand(n, p)
1931 >>> y = np.random.rand(n, 1)
1932 >>> l1 = 0.1 # L1 coefficient
1933 >>> l2 = 0.9 # Ridge coefficient
1934 >>> tv = 1.0 # TV coefficient
1935 >>> A = l1tv.linear_operator_from_shape(shape, p, penalty_start=0)
1936 >>> lr = estimators.LinearRegressionL2SmoothedL1TV(l2, l1, tv, A,
1937 ... algorithm=primaldual.ExcessiveGapMethod(),
1938 ... algorithm_params=dict(max_iter=1000),
1939 ... mean=False)
1940 >>> res = lr.fit(X, y)
1941 >>> error = lr.score(X, y)
1942 >>> round(error, 14)
1943 0.06837304969156
1944 """
1945 - def __init__(self, l2, l1, tv,
1946 A=None,
1947 algorithm=None, algorithm_params=dict(),
1948 penalty_start=0,
1949 mean=True):
1950
1951 if algorithm is None:
1952 algorithm = primaldual.ExcessiveGapMethod(**algorithm_params)
1953 else:
1954 algorithm.set_params(**algorithm_params)
1955
1956 super(LinearRegressionL2SmoothedL1TV, self).__init__(
1957 algorithm=algorithm)
1958
1959 self.l2 = max(0.0, float(l2))
1960 self.l1 = max(0.0, float(l1))
1961 self.tv = max(0.0, float(tv))
1962
1963 if self.l2 < consts.TOLERANCE:
1964 warnings.warn("The ridge parameter should be non-zero.")
1965
1966 if A is None:
1967 raise TypeError("The A matrix may not be None.")
1968 self.A = A
1969
1970 self.penalty_start = max(0, int(penalty_start))
1971 self.mean = bool(mean)
1972
1974 """Returns a dictionary containing all the estimator's parameters.
1975 """
1976 return {"l1": self.l1, "l2": self.l2, "tv": self.tv, "A": self.A,
1977 "penalty_start": self.penalty_start, "mean": self.mean}
1978
1979 - def fit(self, X, y, beta=None):
1980 """Fit the estimator to the data.
1981 """
1982 X, y = check_arrays(X, y)
1983 function = functions.LinearRegressionL2SmoothedL1TV(X, y,
1984 self.l2, self.l1, self.tv,
1985 A=self.A,
1986 penalty_start=self.penalty_start,
1987 mean=self.mean)
1988
1989 self.algorithm.check_compatibility(function,
1990 self.algorithm.INTERFACES)
1991
1992 self.beta = self.algorithm.run(function, beta)
1993
1994 return self
1995
1997 """Return the mean squared error of the estimator.
1998 """
1999 n, p = X.shape
2000 y_hat = np.dot(X, self.beta)
2001
2002 return np.sum((y_hat - y) ** 2.0) / float(n)
2003
2006 """Estimator for PLS regression
2007
2008 f(beta, X, Y) = -Cov(X.beta, Y),
2009
2010 where Cov(., .) is the covariance.
2011
2012 Parameters
2013 ----------
2014 K : Positive integer. The number of components to compute.
2015
2016 algorithm : OR(ImplicitAlgorithm, ExplicitAlgorithm). The algorithm that
2017 should be used. Should be one of:
2018 1. PLSR()
2019 2. MultiblockFISTA(...)
2020
2021 Default is PLSR(...).
2022
2023 algorithm_params : A dict. The dictionary algorithm_params contains
2024 parameters that should be set in the algorithm. Passing
2025 algorithm=MyAlgorithm(**params) is equivalent to passing
2026 algorithm=MyAlgorithm() and algorithm_params=params. Default
2027 is an empty dictionary.
2028
2029 start_vector : BaseStartVector. Generates the start vector that will be
2030 used.
2031
2032 mean : Boolean. Whether or not to compute the means squared error or the
2033 squared error. Default is True, compute the means squared error.
2034
2035 Examples
2036 --------
2037 >>> import parsimony.estimators as estimators
2038 >>> import parsimony.algorithms.nipals as nipals
2039 >>> import parsimony.algorithms.multiblock as multiblock
2040 >>> import numpy as np
2041 >>> np.random.seed(42)
2042 >>>
2043 >>> n, p = 16, 10
2044 >>> X = np.random.rand(n, p)
2045 >>> y = np.random.rand(n, 1)
2046 >>> plsr = estimators.PLSRegression(K=4, algorithm=nipals.PLSR(),
2047 ... algorithm_params=dict(max_iter=100))
2048 >>> error = plsr.fit(X, y).score(X, y)
2049 >>> print "error = ", error
2050 error = 0.0222345224457
2051 """
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2067
2068 self.K = max(1, int(K))
2069
2070 if algorithm is None:
2071 algorithm = nipals.PLSR(**algorithm_params)
2072 else:
2073 algorithm.set_params(**algorithm_params)
2074
2075 super(PLSRegression, self).__init__(algorithm=algorithm,
2076 start_vector=start_vector)
2077
2078 self.unbiased = bool(unbiased)
2079 self.mean = bool(mean)
2080
2082 """Return a dictionary containing the estimator's parameters
2083 """
2084 return {"unbiased": self.unbiased}
2085
2086 - def fit(self, X, Y, w=None):
2087 """Fit the estimator to the data.
2088 """
2089 X, Y = check_arrays(X, Y)
2090
2091 n, p = X.shape
2092 _, q = Y.shape
2093
2094 rankone = deflation.RankOneDeflation()
2095
2096 self.W = np.zeros((p, self.K))
2097 self.T = np.zeros((n, self.K))
2098 self.C = np.zeros((q, self.K))
2099 self.U = np.zeros((n, self.K))
2100 self.P = np.zeros((p, self.K))
2101 for k in range(self.K):
2102
2103 if isinstance(self.algorithm, bases.ExplicitAlgorithm):
2104 cov1 = mb_losses.LatentVariableCovariance([X, Y],
2105 unbiased=self.unbiased)
2106 cov2 = mb_losses.LatentVariableCovariance([Y, X],
2107 unbiased=self.unbiased)
2108
2109 l21 = penalties.L2(c=1.0)
2110 l22 = penalties.L2(c=1.0)
2111
2112 function = mb_losses.CombinedMultiblockFunction([X, Y])
2113 function.add_function(cov1, 0, 1)
2114 function.add_function(cov2, 1, 0)
2115
2116 function.add_constraint(l21, 0)
2117 function.add_constraint(l22, 1)
2118
2119 self.algorithm.check_compatibility(function,
2120 self.algorithm.INTERFACES)
2121
2122
2123
2124
2125 w = [self.start_vector.get_vector(X.shape[1]),
2126 self.start_vector.get_vector(Y.shape[1])]
2127
2128 print "max iter:", self.algorithm.max_iter
2129 w = self.algorithm.run(function, w)
2130 c = w[1]
2131 w = w[0]
2132 else:
2133 w, c = self.algorithm.run([X, Y], w if k == 0 else None)
2134
2135 t = np.dot(X, w)
2136
2137 tt = np.dot(t.T, t)[0, 0]
2138 c = np.dot(Y.T, t)
2139 if tt > consts.TOLERANCE:
2140 c *= 1.0 / tt
2141
2142 cc = np.dot(c.T, c)[0, 0]
2143 u = np.dot(Y, c)
2144 if cc > consts.TOLERANCE:
2145 u *= 1.0 / cc
2146
2147 p = np.dot(X.T, t)
2148 if tt > consts.TOLERANCE:
2149 p *= 1.0 / tt
2150
2151 self.W[:, [k]] = w
2152 self.T[:, [k]] = t
2153 self.C[:, [k]] = c
2154 self.U[:, [k]] = u
2155 self.P[:, [k]] = p
2156
2157 if k < self.K - 1:
2158 X = rankone.deflate(X, t, p)
2159
2160
2161 self.Ws = np.dot(self.W, np.linalg.pinv(np.dot(self.P.T, self.W)))
2162
2163 self.beta = np.dot(self.Ws, self.C.T)
2164
2165 return self
2166
2168 """Returns the (mean) squared error of the estimator.
2169 """
2170 X, Y = check_arrays(X, Y)
2171
2172 Yhat = np.dot(X, self.beta)
2173 err = maths.normFro(Yhat - Y) ** 2.0
2174
2175 if self.mean:
2176 n, p = X.shape
2177 err /= float(n)
2178
2179 return err
2180
2183 """Estimator for sparse PLS regression
2184
2185 f(w, c, X, Y) = -Cov(X.w, Y.c) + l.|w|_1 + k.|c|_1,
2186
2187 where Cov(., .) is the covariance and |.|_1 is the L1 norm.
2188
2189 Parameters
2190 ----------
2191 l : List or tuple of two non-negative floats. The Lagrange multipliers, or
2192 regularisation constants, for the X and Y blocks, respectively.
2193
2194 K : Positive integer. The number of components to compute.
2195
2196 algorithm : OR(ImplicitAlgorithm, ExplicitAlgorithm). The algorithm that
2197 should be used. Should be one of:
2198 1. SparsePLSR()
2199 2. MultiblockFISTA(...)
2200
2201 Default is SparsePLSR(...).
2202
2203 algorithm_params : A dict. The dictionary algorithm_params contains
2204 parameters that should be set in the algorithm. Passing
2205 algorithm=MyAlgorithm(**params) is equivalent to passing
2206 algorithm=MyAlgorithm() and algorithm_params=params. Default
2207 is an empty dictionary.
2208
2209 start_vector : BaseStartVector. Generates the start vector that will be
2210 used.
2211
2212 mean : Boolean. Whether or not to compute the means squared error or the
2213 squared error. Default is True, compute the means squared error.
2214
2215 Examples
2216 --------
2217 >>> import parsimony.estimators as estimators
2218 >>> import parsimony.algorithms.nipals as nipals
2219 >>> import parsimony.algorithms.multiblock as multiblock
2220 >>> import numpy as np
2221 >>> np.random.seed(42)
2222 >>>
2223 >>> n, p = 16, 10
2224 >>> X = np.random.rand(n, p)
2225 >>> y = np.random.rand(n, 1)
2226 >>> plsr = estimators.SparsePLSRegression(l=[3.0, 0.0], K=1,
2227 ... algorithm=nipals.SparsePLSR(),
2228 ... algorithm_params=dict(max_iter=100))
2229 >>> error = plsr.fit(X, y).score(X, y)
2230 >>> print plsr.W
2231 [[ 0.37053417]
2232 [ 0.54969643]
2233 [ 0.29593809]
2234 [ 0.2937247 ]
2235 [ 0.49989677]
2236 [ 0.0895912 ]
2237 [ 0. ]
2238 [ 0.35883331]
2239 [ 0. ]
2240 [ 0. ]]
2241 >>> print plsr.C
2242 [[ 0.32949094]]
2243 >>> print "error = ", error
2244 error = 0.0547513077301
2245 """
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2273
2274 self.l = [max(0.0, float(l[0])),
2275 max(0.0, float(l[1]))]
2276 self.K = max(1, int(K))
2277
2278 if algorithm is None:
2279 algorithm = nipals.SparsePLSR(**algorithm_params)
2280 else:
2281 algorithm.set_params(**algorithm_params)
2282
2283 super(SparsePLSRegression, self).__init__(algorithm=algorithm,
2284 start_vector=start_vector)
2285
2286 self.unbiased = bool(unbiased)
2287 self.mean = bool(mean)
2288
2290 """Return a dictionary containing the estimator's parameters
2291 """
2292 return {"l": self.l, "K": self.K, "unbiased": self.unbiased,
2293 "mean": self.mean}
2294
2295 - def fit(self, X, Y, w=None):
2296 """Fit the estimator to the data.
2297 """
2298 X, Y = check_arrays(X, Y)
2299
2300 n, p = X.shape
2301 _, q = Y.shape
2302
2303 rankone = deflation.RankOneDeflation()
2304
2305 self.W = np.zeros((p, self.K))
2306 self.T = np.zeros((n, self.K))
2307 self.C = np.zeros((q, self.K))
2308 self.U = np.zeros((n, self.K))
2309 self.P = np.zeros((p, self.K))
2310 for k in range(self.K):
2311
2312 if isinstance(self.algorithm, bases.ExplicitAlgorithm):
2313 cov1 = mb_losses.LatentVariableCovariance([X, Y],
2314 unbiased=self.unbiased)
2315 cov2 = mb_losses.LatentVariableCovariance([Y, X],
2316 unbiased=self.unbiased)
2317
2318 l1l2_1 = penalties.L1L2Squared(self.l[0], 1.0)
2319 l1l2_2 = penalties.L1L2Squared(self.l[1], 1.0)
2320
2321
2322
2323
2324
2325 function = mb_losses.CombinedMultiblockFunction([X, Y])
2326 function.add_function(cov1, 0, 1)
2327 function.add_function(cov2, 1, 0)
2328
2329 function.add_penalty(l1l2_1, 0)
2330 function.add_penalty(l1l2_2, 1)
2331
2332
2333
2334
2335
2336
2337 self.algorithm.check_compatibility(function,
2338 self.algorithm.INTERFACES)
2339
2340
2341
2342
2343 w = [self.start_vector.get_vector(X.shape[1]),
2344 self.start_vector.get_vector(Y.shape[1])]
2345
2346 print "max iter:", self.algorithm.max_iter
2347 w = self.algorithm.run(function, w)
2348 c = w[1]
2349 w = w[0]
2350 else:
2351 self.algorithm.set_params(l=self.l)
2352 w, c = self.algorithm.run([X, Y], w if k == 0 else None)
2353
2354 t = np.dot(X, w)
2355
2356 tt = np.dot(t.T, t)[0, 0]
2357 c = np.dot(Y.T, t)
2358 if tt > consts.TOLERANCE:
2359 c *= 1.0 / tt
2360
2361 cc = np.dot(c.T, c)[0, 0]
2362 u = np.dot(Y, c)
2363 if cc > consts.TOLERANCE:
2364 u *= 1.0 / cc
2365
2366 p = np.dot(X.T, t)
2367 if tt > consts.TOLERANCE:
2368 p *= 1.0 / tt
2369
2370 self.W[:, [k]] = w
2371 self.T[:, [k]] = t
2372 self.C[:, [k]] = c
2373 self.U[:, [k]] = u
2374 self.P[:, [k]] = p
2375
2376 if k < self.K - 1:
2377 X = rankone.deflate(X, t, p)
2378
2379
2380 self.Ws = np.dot(self.W, np.linalg.pinv(np.dot(self.P.T, self.W)))
2381
2382 self.beta = np.dot(self.Ws, self.C.T)
2383
2384 return self
2385
2387 """Returns the (mean) squared error of the estimator.
2388 """
2389 X, Y = check_arrays(X, Y)
2390
2391 Yhat = np.dot(X, self.beta)
2392 err = maths.normFro(Yhat - Y) ** 2.0
2393
2394 if self.mean:
2395 n, p = X.shape
2396 err /= float(n)
2397
2398 return err
2399
2402 """Estimator for the clustering problem, i.e. for
2403
2404 f(C, mu) = sum_{i=1}^K sum_{x in C_i} |x - mu_i|²,
2405
2406 where C = {C_1, ..., C_K} is a set of sets of points, mu_i is the mean of
2407 C_i and |.|² is the squared Euclidean norm.
2408
2409 This loss function is known as the within-cluster sum of squares.
2410
2411 Parameters
2412 ----------
2413 K : Positive integer. The number of clusters to find.
2414
2415 algorithm : Currently only the K-means algorithm (Lloyd's algorithm). The
2416 algorithm that should be used. Should be one of:
2417 1. KMeans(...)
2418
2419 Default is KMeans(...).
2420
2421 algorithm_params : A dictionary. The dictionary algorithm_params contains
2422 parameters that should be set in the algorithm. Passing
2423 algorithm=MyAlgorithm(**params) is equivalent to passing
2424 algorithm=MyAlgorithm() and algorithm_params=params. Default
2425 is an empty dictionary.
2426
2427 Examples
2428 --------
2429 >>> import parsimony.estimators as estimators
2430 >>> import parsimony.algorithms.cluster as cluster
2431 >>> import numpy as np
2432 >>> np.random.seed(1337)
2433 >>>
2434 >>> K = 3
2435 >>> n, p = 150, 2
2436 >>> X = np.vstack((2 * np.random.rand(n / 3, 2) - 2,
2437 ... 0.5 * np.random.rand(n / 3, 2),
2438 ... np.hstack([0.5 * np.random.rand(n / 3, 1) - 1,
2439 ... 0.5 * np.random.rand(n / 3, 1)])))
2440 >>> lloyds = cluster.KMeans(K, max_iter=100, repeat=10)
2441 >>> KMeans = estimators.Clustering(K, algorithm=lloyds)
2442 >>> error = KMeans.fit(X).score(X)
2443 >>> print error
2444 27.6675491884
2445 >>>
2446 >>> #import matplotlib.pyplot as plot
2447 >>> #mus = KMeans._means
2448 >>> #plot.plot(X[:, 0], X[:, 1], '*')
2449 >>> #plot.plot(mus[:, 0], mus[:, 1], 'rs')
2450 >>> #plot.show()
2451 """
2452 - def __init__(self, K, algorithm=None, algorithm_params=dict()):
2453
2454 self.K = max(1, int(K))
2455
2456 if algorithm is None:
2457 algorithm = cluster.KMeans(**algorithm_params)
2458 else:
2459 algorithm.set_params(**algorithm_params)
2460
2461 super(Clustering, self).__init__(algorithm=algorithm)
2462
2464 """Return a dictionary containing the estimator's own input parameters.
2465 """
2466 return {"K": self.K}
2467
2468 - def fit(self, X, means=None):
2469 """Fit the estimator to the data.
2470 """
2471 X = check_arrays(X)
2472
2473 self._means = self.algorithm.run(X)
2474
2475 return self
2476
2478 """Perform prediction using the fitted parameters.
2479
2480 Finds the closest cluster centre to each point. I.e. assigns a class to
2481 each point.
2482
2483 Returns
2484 -------
2485 closest : A list. A list with p elements: The cluster indices.
2486 """
2487 X = check_arrays(X)
2488
2489 dists = np.zeros((X.shape[0], self.K))
2490 i = 0
2491 for mu in self._means:
2492 dist = np.sum((X - mu) ** 2.0, axis=1)
2493 dists[:, i] = dist
2494 i += 1
2495
2496 closest = np.argmin(dists, axis=1).tolist()
2497
2498 return closest
2499
2501 """Returns the estimator's fitted means.
2502 """
2503 return self._means
2504
2506 """Computes the within-cluster sum of squares.
2507 """
2508 X = check_arrays(X)
2509
2510 closest = np.array(self.predict(X))
2511 wcss = 0.0
2512 for i in xrange(self.K):
2513 idx = closest == i
2514 wcss += np.sum((X[idx, :] - self._means[i, :]) ** 2.0)
2515
2516 return wcss
2517
2518
2519 if __name__ == "__main__":
2520 import doctest
2521 doctest.testmod()
2522