Document the SensorFeatures class and its __init__ method.

2021-09-13 17:43:47 +02:00 · 2021-09-13 17:43:47 +02:00 · af9e81fe40
parent b19eebbb92
commit af9e81fe40
4 changed files with 62 additions and 8 deletions
--- a/exploration/ex_ml_pipeline.py
+++ b/exploration/ex_ml_pipeline.py
@ -29,13 +29,14 @@ nb_dir = os.path.split(os.getcwd())[0]
 if nb_dir not in sys.path:
    sys.path.append(nb_dir)

-# %%
-import participants.query_db
-from features import esm, helper, proximity
 import machine_learning.features_sensor
 import machine_learning.labels
 import machine_learning.model

+# %%
+import participants.query_db
+from features import esm, helper, proximity
+
 # %% [markdown]
 # # 1. Get the relevant data

@ -169,7 +170,9 @@ with open("../machine_learning/config/minimal_features.yaml", "r") as file:
 print(sensor_features_params)

 # %%
-sensor_features = machine_learning.features_sensor.SensorFeatures(**sensor_features_params)
+sensor_features = machine_learning.features_sensor.SensorFeatures(
+    **sensor_features_params
+)
 sensor_features.data_types

 # %%
--- a/machine_learning/features_sensor.py
+++ b/machine_learning/features_sensor.py
@ -7,7 +7,7 @@ import pandas as pd
 from pyprojroot import here

 import participants.query_db
-from features import proximity, helper, communication
+from features import communication, helper, proximity

 WARNING_PARTICIPANTS_LABEL = (
    "Before calculating features, please set participants label using self.set_participants_label() "
@ -17,13 +17,64 @@ WARNING_PARTICIPANTS_LABEL = (


 class SensorFeatures:
+    """
+    A class to represent all sensor (AWARE) features.
+
+    Attributes
+    ----------
+    grouping_variable: str
+        The name of the variable by which to group (segment) data, e.g. date_lj.
+    features: dict
+        A dictionary of sensors (data types) and features to calculate.
+        See config/minimal_features.yaml for an example.
+    participants_usernames: Collection
+        A list of usernames for which to calculate features.
+        If None, use all participants.
+
+    Methods
+    -------
+    set_sensor_data():
+        Query the database for data types defined by self.features.
+    get_sensor_data(data_type): pd.DataFrame
+        Returns the dataframe of sensor data for specified data_type.
+    calculate_features():
+        Calls appropriate functions from features/ and joins them in a single dataframe, df_features_all.
+    get_features(data_type, feature_names): pd.DataFrame
+        Returns the dataframe of specified features for selected sensor.
+
+    construct_export_path():
+        Construct a path for exporting the features as csv files.
+    set_participants_label(label):
+        Sets a label for the usernames subset. This is used to distinguish feature exports.
+    """
+
    def __init__(
        self,
        grouping_variable: str,
        features: dict,
        participants_usernames: Collection = None,
    ):
+        """
+        Specifies the grouping variable and usernames for which to calculate features.
+        Sets other (implicit) attributes used in other methods.
+        If participants_usernames=None, this queries the usernames which belong to the main part of the study,
+            i.e. from 2020-08-01 on.

+        Parameters
+        ----------
+        grouping_variable: str
+            The name of the variable by which to group (segment) data, e.g. date_lj.
+        features: dict
+            A dictionary of sensors (data types) and features to calculate.
+            See config/minimal_features.yaml for an example.
+        participants_usernames: Collection
+            A list of usernames for which to calculate features.
+            If None, use all participants.
+
+        Returns
+        -------
+        None
+        """
        self.grouping_variable_name = grouping_variable
        self.grouping_variable = [grouping_variable]

@ -170,4 +221,4 @@ def to_csv_with_settings(
        index=False,
        encoding="utf-8",
    )
-    print("Exported the dataframe to " + str(full_path))
+    print("Exported the dataframe to " + str(full_path))
--- a/machine_learning/labels.py
+++ b/machine_learning/labels.py
@ -83,4 +83,4 @@ class Labels:
        print("Labels aggregated.")

    def get_aggregated_labels(self):
-        return self.df_esm_means
+        return self.df_esm_means
--- a/machine_learning/model.py
+++ b/machine_learning/model.py
@ -44,4 +44,4 @@ class ModelValidation:
            cv=self.cv,
            n_jobs=-1,
            scoring="r2",
-        )
+        )