import os import sys import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.feature_selection import SequentialFeatureSelector from sklearn.naive_bayes import GaussianNB """ Feature selection pipeline: a methods that can be used in the wrapper metod alongside other wrapper contents (hyperparameter tuning etc.). (1) Establish methods for each of the steps in feature selection protocol: (a) feature selection inside specific sensors (sklearn method): returns most important features from all sensors (b) feature selection between "tuned" sensors: returns filtered sensors, containing most important features retured with (a) (2) Ensure that above methods are given only a part of data and use appropriate random seeds - to later simulate use case in production. (3) Implement a method which gives graphical exploration of (1) (a) and (b) steps of the feature selection. (4) Prepare a core method that can be fit into a wrapper (see sklearn wrapper methods) and integrates methods from (1) """ class FeatureSelection: def __init__(self, X_train, X_test, y_train, y_test): # TODO: what about leave-one-subject-out CV? pass def within_sensors_feature_selection(estimator, scoring, tol): features_list = [] nb = GaussianNB() sfs = SequentialFeatureSelector(nb, n_features_to_select='auto', tol=0.02) # Can set n_features to an absolute value -> then remove tol parameter. return features_list def between_sensors_feature_selection(): pass def vizualize_feature_selection_process(): pass def execute_feature_selection_step(): pass