1
2
3
4
5
6
7
8
9 """Feature selection base class and related stuff base classes and helpers."""
10
11 __docformat__ = 'restructuredtext'
12
13 import numpy as np
14
15 from mvpa.featsel.helpers import FractionTailSelector
16 from mvpa.misc.state import StateVariable, ClassWithCollections
17
18 if __debug__:
19 from mvpa.base import debug
20
22 """Base class for any feature selection
23
24 Base class for Functors which implement feature selection on the
25 datasets.
26 """
27
28 selected_ids = StateVariable(enabled=False)
29
33
34
35 - def __call__(self, dataset, testdataset=None):
36 """Invocation of the feature selection
37
38 :Parameters:
39 dataset : Dataset
40 dataset used to select features
41 testdataset : Dataset
42 dataset the might be used to compute a stopping criterion
43
44 Returns a tuple with the dataset containing the selected features.
45 If present the tuple also contains the selected features of the
46 test dataset. Derived classes must provide interface to access other
47 relevant to the feature selection process information (e.g. mask,
48 elimination step (in RFE), etc)
49 """
50 raise NotImplementedError
51
52
54 """ 'Untrain' feature selection
55
56 Necessary for full 'untraining' of the classifiers. By default
57 does nothing, needs to be overridden in corresponding feature
58 selections to pass to the sensitivities
59 """
60 pass
61
62
64 """Feature elimination.
65
66 A `FeaturewiseDatasetMeasure` is used to compute sensitivity maps given a certain
67 dataset. These sensitivity maps are in turn used to discard unimportant
68 features.
69 """
70
71 sensitivity = StateVariable(enabled=False)
72
78 """Initialize feature selection
79
80 :Parameters:
81 sensitivity_analyzer : FeaturewiseDatasetMeasure
82 sensitivity analyzer to come up with sensitivity
83 feature_selector : Functor
84 Given a sensitivity map it has to return the ids of those
85 features that should be kept.
86
87 """
88
89
90 FeatureSelection.__init__(self, **kwargs)
91
92 self.__sensitivity_analyzer = sensitivity_analyzer
93 """Sensitivity analyzer to use once"""
94
95 self.__feature_selector = feature_selector
96 """Functor which takes care about removing some features."""
97
98
100 if __debug__:
101 debug("FS_", "Untraining sensitivity-based FS: %s" % self)
102 self.__sensitivity_analyzer.untrain()
103
104
105 - def __call__(self, dataset, testdataset=None):
152
153
154 sensitivity_analyzer = property(fget=lambda self:self.__sensitivity_analyzer,
155 doc="Measure which was used to do selection")
156
157
159 """Feature elimination through the list of FeatureSelection's.
160
161 Given as list of FeatureSelections it applies them in turn.
162 """
163
164 nfeatures = StateVariable(
165 doc="Number of features before each step in pipeline")
166
167
168 - def __init__(self,
169 feature_selections,
170 **kwargs
171 ):
172 """Initialize feature selection pipeline
173
174 :Parameters:
175 feature_selections : lisf of FeatureSelection
176 selections which to use. Order matters
177 """
178
179 FeatureSelection.__init__(self, **kwargs)
180
181 self.__feature_selections = feature_selections
182 """Selectors to use in turn"""
183
184
186 if __debug__:
187 debug("FS_", "Untraining FS pipeline: %s" % self)
188 for fs in self.__feature_selections:
189 fs.untrain()
190
191
192 - def __call__(self, dataset, testdataset=None, **kwargs):
225
226 feature_selections = property(fget=lambda self:self.__feature_selections,
227 doc="List of `FeatureSelections`")
228
229
230
232 """Meta feature selection utilizing several embedded selection methods.
233
234 Each embedded feature selection method is computed individually. Afterwards
235 all feature sets are combined by either taking the union or intersection of
236 all sets.
237
238 The individual feature sets of all embedded methods are optionally avialable
239 from the `selections_ids` state variable.
240 """
241 selections_ids = StateVariable(
242 doc="List of feature id sets for each performed method.")
243
244 - def __init__(self, feature_selections, combiner, **kwargs):
245 """
246 :Parameters:
247 feature_selections: list
248 FeatureSelection instances to run. Order is not important.
249 combiner: 'union', 'intersection'
250 which method to be used to combine the feature selection set of
251 all computed methods.
252 """
253 FeatureSelection.__init__(self, **kwargs)
254
255 self.__feature_selections = feature_selections
256 self.__combiner = combiner
257
258
260 if __debug__:
261 debug("FS_", "Untraining combined FS: %s" % self)
262 for fs in self.__feature_selections:
263 fs.untrain()
264
265
266 - def __call__(self, dataset, testdataset=None):
315
316
317 feature_selections = property(fget=lambda self:self.__feature_selections,
318 doc="List of `FeatureSelections`")
319 combiner = property(fget=lambda self:self.__combiner,
320 doc="Selection set combination method.")
321