45 lines
1.6 KiB
Python
45 lines
1.6 KiB
Python
import os
|
|
import sys
|
|
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
from sklearn.feature_selection import SequentialFeatureSelector
|
|
from sklearn.naive_bayes import GaussianNB
|
|
|
|
|
|
""" Feature selection pipeline: a methods that can be used in the wrapper metod alongside other wrapper contents (hyperparameter tuning etc.).
|
|
|
|
(1) Establish methods for each of the steps in feature selection protocol:
|
|
(a) feature selection inside specific sensors (sklearn method): returns most important features from all sensors
|
|
(b) feature selection between "tuned" sensors: returns filtered sensors, containing most important features retured with (a)
|
|
(2) Ensure that above methods are given only a part of data and use appropriate random seeds - to later simulate use case in production.
|
|
(3) Implement a method which gives graphical exploration of (1) (a) and (b) steps of the feature selection.
|
|
(4) Prepare a core method that can be fit into a wrapper (see sklearn wrapper methods) and integrates methods from (1)
|
|
|
|
"""
|
|
|
|
class FeatureSelection:
|
|
|
|
def __init__(self, X_train, X_test, y_train, y_test): # TODO: what about leave-one-subject-out CV?
|
|
pass
|
|
|
|
|
|
def within_sensors_feature_selection(estimator, scoring, tol):
|
|
features_list = []
|
|
|
|
nb = GaussianNB()
|
|
sfs = SequentialFeatureSelector(nb, n_features_to_select='auto', tol=0.02) # Can set n_features to an absolute value -> then remove tol parameter.
|
|
|
|
|
|
return features_list
|
|
|
|
def between_sensors_feature_selection():
|
|
pass
|
|
|
|
def vizualize_feature_selection_process():
|
|
pass
|
|
|
|
def execute_feature_selection_step():
|
|
pass |