Source code for mafese.selector

#!/usr/bin/env python
# Created by "Thieu" at 10:43, 24/05/2023 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

from abc import ABC
from mafese.utils import validator
from mafese.utils.estimator import get_general_estimator
from mafese.utils.evaluator import get_metrics, get_all_classification_metrics, get_all_regression_metrics
from mafese.utils.data_loader import Data


[docs]class Selector(ABC): """ Defines an abstract class for Feature Selector. """ name = "Feature Selector" SUPPORTED_PROBLEMS = ["classification", "regression"] SUPPORTED_ESTIMATORS = ["knn", "svm", "rf", "adaboost", "xgb", "tree", "ann"] SUPPORTED_REGRESSION_METRICS = list(get_all_regression_metrics().keys()) SUPPORTED_CLASSIFICATION_METRICS = list(get_all_classification_metrics().keys()) def __init__(self, problem="classification"): self.problem = self._set_problem(problem) self.selector = None self.estimator = None self.paras = {} self.selected_feature_indexes = [] self.selected_feature_masks = [] self.selected_feature_solution = [] self.epsilon = 1e-8 self.w = 1e8 def _set_problem(self, problem): if type(problem) is not str: raise TypeError(f"problem should be string, and is 'classification' or 'regression'.") else: return validator.check_str("problem", problem, self.SUPPORTED_PROBLEMS)
[docs] def fit(self, X, y=None): """Learn the features to select from X. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors. y : array-like of shape (n_samples,), default=None Target values. This parameter may be ignored for unsupervised learning. Returns ------- self : object Returns the instance itself. """ return self.selector.fit(X, y)
[docs] def transform(self, X): """Reduce X to the selected features. Parameters ---------- X : array of shape [n_samples, n_features] The input samples. Returns ------- X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features. """ return self.selector.transform(X)
[docs] def fit_transform(self, X, y=None, **fit_params): """ Fit to data, then transform it. Fits transformer to `X` and `y` with optional parameters `fit_params` and returns a transformed version of `X`. Parameters ---------- X : array-like of shape (n_samples, n_features) Input samples. y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ default=None Target values (None for unsupervised transformations). **fit_params : dict Additional fit parameters. Returns ------- X_new : ndarray array of shape (n_samples, n_features_new) Transformed array. """ return self.selector.fit_transform(X, y, **fit_params)
[docs] def evaluate(self, estimator=None, estimator_paras=None, data=None, metrics=None): """ Evaluate the new dataset. We will re-train the estimator with training set and return the metrics of both training and testing set Parameters ---------- estimator : str or Estimator instance (from scikit-learn or custom) If estimator is str, we are currently support: - knn: k-nearest neighbors - svm: support vector machine - rf: random forest - adaboost: AdaBoost - xgb: Gradient Boosting - tree: Extra Trees - ann: Artificial Neural Network (Multi-Layer Perceptron) If estimator is Estimator instance: you need to make sure that it has `fit` and `predict` methods estimator_paras: None or dict, default = None The parameters of the estimator, please see the official document of scikit-learn to selected estimator. If None, we use the default parameter for selected estimator data : Data, an instance of Data class. It must have training and testing set metrics : tuple, list, default = None Depend on the regression or classification you are trying to tackle. The supported metrics can be found at: https://github.com/thieu1995/permetrics Returns ------- metrics_results: dict. The metrics for both training and testing set. """ if estimator is None: if self.estimator is None: raise ValueError("You need to set estimator to evaluate the data.") est_ = self.estimator elif type(estimator) is str: estimator_name = validator.check_str("estimator", estimator, self.SUPPORTED_ESTIMATORS) est_ = get_general_estimator(self.problem, estimator_name, estimator_paras) elif (hasattr(estimator, 'fit') and hasattr(estimator, 'predict')) and (callable(estimator.fit) and callable(estimator.predict)): est_ = estimator else: raise NotImplementedError(f"Your estimator needs to implement at least 'fit' and 'predict' functions.") if (metrics is None) or (type(metrics) not in (tuple, list)): raise ValueError("You need to pass a tuple/list of performance metrics. See the supported metrics at https://github.com/thieu1995/permetrics") if isinstance(data, Data): X_train = self.transform(data.X_train) X_test = self.transform(data.X_test) est_.fit(X_train, data.y_train) y_train_pred = est_.predict(X_train) y_test_pred = est_.predict(X_test) train_result = get_metrics(self.problem, data.y_train, y_train_pred, metrics=metrics, testcase="train") test_result = get_metrics(self.problem, data.y_test, y_test_pred, metrics=metrics, testcase="test") return {**train_result, **test_result} else: raise ValueError("'data' should be an instance of Data class.")