From af9e81fe408da149911a974d7fea5c0c3fe10c70 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Mon, 13 Sep 2021 17:43:47 +0200
Subject: [PATCH] Document the SensorFeatures class and its __init__ method.

---
 exploration/ex_ml_pipeline.py       | 11 +++---
 machine_learning/features_sensor.py | 55 +++++++++++++++++++++++++++--
 machine_learning/labels.py          |  2 +-
 machine_learning/model.py           |  2 +-
 4 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py
index b33d20b..178b62f 100644
--- a/exploration/ex_ml_pipeline.py
+++ b/exploration/ex_ml_pipeline.py
@@ -29,13 +29,14 @@ nb_dir = os.path.split(os.getcwd())[0]
 if nb_dir not in sys.path:
     sys.path.append(nb_dir)
 
-# %%
-import participants.query_db
-from features import esm, helper, proximity
 import machine_learning.features_sensor
 import machine_learning.labels
 import machine_learning.model
 
+# %%
+import participants.query_db
+from features import esm, helper, proximity
+
 # %% [markdown]
 # # 1. Get the relevant data
 
@@ -169,7 +170,9 @@ with open("../machine_learning/config/minimal_features.yaml", "r") as file:
 print(sensor_features_params)
 
 # %%
-sensor_features = machine_learning.features_sensor.SensorFeatures(**sensor_features_params)
+sensor_features = machine_learning.features_sensor.SensorFeatures(
+    **sensor_features_params
+)
 sensor_features.data_types
 
 # %%
diff --git a/machine_learning/features_sensor.py b/machine_learning/features_sensor.py
index 2567004..5b28741 100644
--- a/machine_learning/features_sensor.py
+++ b/machine_learning/features_sensor.py
@@ -7,7 +7,7 @@ import pandas as pd
 from pyprojroot import here
 
 import participants.query_db
-from features import proximity, helper, communication
+from features import communication, helper, proximity
 
 WARNING_PARTICIPANTS_LABEL = (
     "Before calculating features, please set participants label using self.set_participants_label() "
@@ -17,13 +17,64 @@ WARNING_PARTICIPANTS_LABEL = (
 
 
 class SensorFeatures:
+    """
+    A class to represent all sensor (AWARE) features.
+
+    Attributes
+    ----------
+    grouping_variable: str
+        The name of the variable by which to group (segment) data, e.g. date_lj.
+    features: dict
+        A dictionary of sensors (data types) and features to calculate.
+        See config/minimal_features.yaml for an example.
+    participants_usernames: Collection
+        A list of usernames for which to calculate features.
+        If None, use all participants.
+
+    Methods
+    -------
+    set_sensor_data():
+        Query the database for data types defined by self.features.
+    get_sensor_data(data_type): pd.DataFrame
+        Returns the dataframe of sensor data for specified data_type.
+    calculate_features():
+        Calls appropriate functions from features/ and joins them in a single dataframe, df_features_all.
+    get_features(data_type, feature_names): pd.DataFrame
+        Returns the dataframe of specified features for selected sensor.
+
+    construct_export_path():
+        Construct a path for exporting the features as csv files.
+    set_participants_label(label):
+        Sets a label for the usernames subset. This is used to distinguish feature exports.
+    """
+
     def __init__(
         self,
         grouping_variable: str,
         features: dict,
         participants_usernames: Collection = None,
     ):
+        """
+        Specifies the grouping variable and usernames for which to calculate features.
+        Sets other (implicit) attributes used in other methods.
+        If participants_usernames=None, this queries the usernames which belong to the main part of the study,
+            i.e. from 2020-08-01 on.
 
+        Parameters
+        ----------
+        grouping_variable: str
+            The name of the variable by which to group (segment) data, e.g. date_lj.
+        features: dict
+            A dictionary of sensors (data types) and features to calculate.
+            See config/minimal_features.yaml for an example.
+        participants_usernames: Collection
+            A list of usernames for which to calculate features.
+            If None, use all participants.
+
+        Returns
+        -------
+        None
+        """
         self.grouping_variable_name = grouping_variable
         self.grouping_variable = [grouping_variable]
 
@@ -170,4 +221,4 @@ def to_csv_with_settings(
         index=False,
         encoding="utf-8",
     )
-    print("Exported the dataframe to " + str(full_path))
\ No newline at end of file
+    print("Exported the dataframe to " + str(full_path))
diff --git a/machine_learning/labels.py b/machine_learning/labels.py
index 76a7ab9..9c4b968 100644
--- a/machine_learning/labels.py
+++ b/machine_learning/labels.py
@@ -83,4 +83,4 @@ class Labels:
         print("Labels aggregated.")
 
     def get_aggregated_labels(self):
-        return self.df_esm_means
\ No newline at end of file
+        return self.df_esm_means
diff --git a/machine_learning/model.py b/machine_learning/model.py
index ef115d3..da38c79 100644
--- a/machine_learning/model.py
+++ b/machine_learning/model.py
@@ -44,4 +44,4 @@ class ModelValidation:
             cv=self.cv,
             n_jobs=-1,
             scoring="r2",
-        )
\ No newline at end of file
+        )