From af9e81fe408da149911a974d7fea5c0c3fe10c70 Mon Sep 17 00:00:00 2001 From: junos Date: Mon, 13 Sep 2021 17:43:47 +0200 Subject: [PATCH] Document the SensorFeatures class and its __init__ method. --- exploration/ex_ml_pipeline.py | 11 +++--- machine_learning/features_sensor.py | 55 +++++++++++++++++++++++++++-- machine_learning/labels.py | 2 +- machine_learning/model.py | 2 +- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index b33d20b..178b62f 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -29,13 +29,14 @@ nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: sys.path.append(nb_dir) -# %% -import participants.query_db -from features import esm, helper, proximity import machine_learning.features_sensor import machine_learning.labels import machine_learning.model +# %% +import participants.query_db +from features import esm, helper, proximity + # %% [markdown] # # 1. Get the relevant data @@ -169,7 +170,9 @@ with open("../machine_learning/config/minimal_features.yaml", "r") as file: print(sensor_features_params) # %% -sensor_features = machine_learning.features_sensor.SensorFeatures(**sensor_features_params) +sensor_features = machine_learning.features_sensor.SensorFeatures( + **sensor_features_params +) sensor_features.data_types # %% diff --git a/machine_learning/features_sensor.py b/machine_learning/features_sensor.py index 2567004..5b28741 100644 --- a/machine_learning/features_sensor.py +++ b/machine_learning/features_sensor.py @@ -7,7 +7,7 @@ import pandas as pd from pyprojroot import here import participants.query_db -from features import proximity, helper, communication +from features import communication, helper, proximity WARNING_PARTICIPANTS_LABEL = ( "Before calculating features, please set participants label using self.set_participants_label() " @@ -17,13 +17,64 @@ WARNING_PARTICIPANTS_LABEL = ( class SensorFeatures: + """ + A class to represent all sensor (AWARE) features. + + Attributes + ---------- + grouping_variable: str + The name of the variable by which to group (segment) data, e.g. date_lj. + features: dict + A dictionary of sensors (data types) and features to calculate. + See config/minimal_features.yaml for an example. + participants_usernames: Collection + A list of usernames for which to calculate features. + If None, use all participants. + + Methods + ------- + set_sensor_data(): + Query the database for data types defined by self.features. + get_sensor_data(data_type): pd.DataFrame + Returns the dataframe of sensor data for specified data_type. + calculate_features(): + Calls appropriate functions from features/ and joins them in a single dataframe, df_features_all. + get_features(data_type, feature_names): pd.DataFrame + Returns the dataframe of specified features for selected sensor. + + construct_export_path(): + Construct a path for exporting the features as csv files. + set_participants_label(label): + Sets a label for the usernames subset. This is used to distinguish feature exports. + """ + def __init__( self, grouping_variable: str, features: dict, participants_usernames: Collection = None, ): + """ + Specifies the grouping variable and usernames for which to calculate features. + Sets other (implicit) attributes used in other methods. + If participants_usernames=None, this queries the usernames which belong to the main part of the study, + i.e. from 2020-08-01 on. + Parameters + ---------- + grouping_variable: str + The name of the variable by which to group (segment) data, e.g. date_lj. + features: dict + A dictionary of sensors (data types) and features to calculate. + See config/minimal_features.yaml for an example. + participants_usernames: Collection + A list of usernames for which to calculate features. + If None, use all participants. + + Returns + ------- + None + """ self.grouping_variable_name = grouping_variable self.grouping_variable = [grouping_variable] @@ -170,4 +221,4 @@ def to_csv_with_settings( index=False, encoding="utf-8", ) - print("Exported the dataframe to " + str(full_path)) \ No newline at end of file + print("Exported the dataframe to " + str(full_path)) diff --git a/machine_learning/labels.py b/machine_learning/labels.py index 76a7ab9..9c4b968 100644 --- a/machine_learning/labels.py +++ b/machine_learning/labels.py @@ -83,4 +83,4 @@ class Labels: print("Labels aggregated.") def get_aggregated_labels(self): - return self.df_esm_means \ No newline at end of file + return self.df_esm_means diff --git a/machine_learning/model.py b/machine_learning/model.py index ef115d3..da38c79 100644 --- a/machine_learning/model.py +++ b/machine_learning/model.py @@ -44,4 +44,4 @@ class ModelValidation: cv=self.cv, n_jobs=-1, scoring="r2", - ) \ No newline at end of file + )