Installation¶
Install the current PyPI release:
$ pip install mafese==0.1.9
Install directly from source code:
$ git clone https://github.com/thieu1995/mafese.git $ cd mafese $ python setup.py install
In case, you want to install the development version from Github:
$ pip install git+https://github.com/thieu1995/mafese
After installation, you can import MAFESE as any other Python module:
$ python
>>> import mafese
>>> mafese.__version__
Lib’s structure¶
Current’s structure:
docs
examples
mafese
data/
cls/
...csv
reg/
...csv
wrapper/
mha.py
recursive.py
sequential.py
embedded/
lasso.py
tree.py
filter.py
unsupervised.py
utils/
correlation.py
data_loader.py
encoder.py
estimator.py
mealpy_util.py
transfer.py
validator.py
__init__.py
selector.py
README.md
setup.py
Examples¶
Let’s go through some examples.
First, you need to load your dataset, or you can load own available datasets:
# Load available dataset from MAFESE
from mafese import get_dataset
# Try unknown data
get_dataset("unknown")
# Enter: 1
data = get_dataset("Arrhythmia")
Load your own dataset if you want:
import pandas as pd
from mafese import Data
# load X and y
# NOTE mafese accepts numpy arrays only, hence the .values attribute
dataset = pd.read_csv('examples/dataset.csv', index_col=0).values
X, y = dataset[:, 0:-1], dataset[:, -1]
data = Data(X, y)
Next, split dataset into train and test set:
data.split_train_test(test_size=0.2, inplace=True)
print(data.X_train[:2].shape)
print(data.y_train[:2].shape)
Next, how to use Recursive wrapper-based method:
from mafese.wrapper.recursive import RecursiveSelector
# define mafese feature selection method
feat_selector = RecursiveSelector(problem="classification", estimator="rf", n_features=5)
# find all relevant features - 5 features should be selected
feat_selector.fit(data.X_train, data.y_train)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
Or, how to use Sequential (backward or forward) wrapper-based method:
from mafese.wrapper.sequential import SequentialSelector
# define mafese feature selection method
feat_selector = SequentialSelector(problem="classification", estimator="knn", n_features=3, direction="forward")
# find all relevant features - 5 features should be selected
feat_selector.fit(data.X_train, data.y_train)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
Or, how to use Filter-based feature selection with different correlation methods:
from mafese.filter import FilterSelector
# define mafese feature selection method
feat_selector = FilterSelector(problem='classification', method='SPEARMAN', n_features=5)
# find all relevant features - 5 features should be selected
feat_selector.fit(data.X_train, data.y_train)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
Or, use Metaheuristic-based feature selection with different metaheuristic algorithms:
from mafese.wrapper.mha import MhaSelector
from mafese import get_dataset
from mafese import evaluator
from sklearn.svm import SVC
data = get_dataset("Arrhythmia")
data.split_train_test(test_size=0.2)
print(data.X_train.shape, data.X_test.shape) # (361, 279) (91, 279)
# define mafese feature selection method
feat_selector = MhaSelector(problem="classification", estimator="knn",
optimizer="BaseGA", optimizer_paras=None,
transfer_func="vstf_01", obj_name="AS")
# find all relevant features
feat_selector.fit(data.X_train, data.y_train, fit_weights=(0.9, 0.1), verbose=True)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
# Evaluate final dataset with different estimator with multiple performance metrics
results = feat_selector.evaluate(estimator=SVC(), data=data, metrics=["AS", "PS", "RS"])
print(results)
# {'AS_train': 0.77176, 'PS_train': 0.54177, 'RS_train': 0.6205, 'AS_test': 0.72636, 'PS_test': 0.34628, 'RS_test': 0.52747}
Or, use Lasso-based feature selection with different estimator:
from mafese.embedded.lasso import LassoSelector
from mafese import get_dataset
from mafese import evaluator
from sklearn.svm import SVC
data = get_dataset("Arrhythmia")
data.split_train_test(test_size=0.2)
print(data.X_train.shape, data.X_test.shape) # (361, 279) (91, 279)
# define mafese feature selection method
feat_selector = LassoSelector(problem="classification", estimator="lasso", estimator_paras={"alpha": 0.1})
# find all relevant features
feat_selector.fit(data.X_train, data.y_train)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
# Evaluate final dataset with different estimator with multiple performance metrics
results = feat_selector.evaluate(estimator=SVC(), data=data, metrics=["AS", "PS", "RS"])
print(results)
# {'AS_train': 0.77176, 'PS_train': 0.54177, 'RS_train': 0.6205, 'AS_test': 0.72636, 'PS_test': 0.34628, 'RS_test': 0.52747}
Or, use Tree-based feature selection with different estimator:
from mafese.embedded.tree import TreeSelector
from mafese import get_dataset
from mafese import evaluator
from sklearn.svm import SVC
data = get_dataset("Arrhythmia")
data.split_train_test(test_size=0.2)
print(data.X_train.shape, data.X_test.shape) # (361, 279) (91, 279)
# define mafese feature selection method
feat_selector = TreeSelector(problem="classification", estimator="tree")
# find all relevant features
feat_selector.fit(data.X_train, data.y_train)
# check selected features - True (or 1) is selected, False (or 0) is not selected
print(feat_selector.selected_feature_masks)
print(feat_selector.selected_feature_solution)
# check the index of selected features
print(feat_selector.selected_feature_indexes)
# call transform() on X to filter it down to selected features
X_train_selected = feat_selector.transform(data.X_train)
X_test_selected = feat_selector.transform(data.X_test)
# Evaluate final dataset with different estimator with multiple performance metrics
results = feat_selector.evaluate(estimator=SVC(), data=data, metrics=["AS", "PS", "RS"])
print(results)
# {'AS_train': 0.77176, 'PS_train': 0.54177, 'RS_train': 0.6205, 'AS_test': 0.72636, 'PS_test': 0.34628, 'RS_test': 0.52747}
For more usage examples please look at [examples](/examples) folder.