Further refactor by moving helper functions.

2021-09-15 15:14:54 +02:00 · 2021-09-15 15:14:54 +02:00 · 20748890a8
parent 28699a0fdf
commit 20748890a8
3 changed files with 69 additions and 59 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ __pycache__/
 /exploration/*.ipynb
 /config/*.ipynb
 /statistical_analysis/*.ipynb
 /machine_learning/intermediate_results/
--- a/machine_learning/features_sensor.py
+++ b/machine_learning/features_sensor.py
@ -8,6 +8,11 @@ from pyprojroot import here
 import participants.query_db
 from features import communication, helper, proximity
 from machine_learning.helper import (
    read_csv_with_settings,
    safe_outer_merge_on_index,
    to_csv_with_settings,
 )
 WARNING_PARTICIPANTS_LABEL = (
    "Before calculating features, please set participants label using self.set_participants_label() "
@ -53,7 +58,7 @@ class SensorFeatures:
        grouping_variable: str,
        features: dict,
        participants_usernames: Collection = None,
-    ):
+    ) -> None:
        """
        Specifies the grouping variable and usernames for which to calculate features.
        Sets other (implicit) attributes used in other methods.
@ -97,12 +102,12 @@ class SensorFeatures:
        self.df_sms = pd.DataFrame()
        self.df_calls_sms = pd.DataFrame()
-        self.folder = None
+        self.folder: Path = Path()
        self.filename_prefix = ""
        self.construct_export_path()
        print("SensorFeatures initialized.")
-    def set_sensor_data(self):
+    def set_sensor_data(self) -> None:
        print("Querying database ...")
        if "proximity" in self.data_types:
            self.df_proximity = proximity.get_proximity_data(
@ -128,7 +133,7 @@ class SensorFeatures:
        else:
            raise KeyError("This data type has not been implemented.")
-    def calculate_features(self, cached=True):
+    def calculate_features(self, cached=True) -> None:
        print("Calculating features ...")
        if not self.participants_label:
            raise ValueError(WARNING_PARTICIPANTS_LABEL)
@ -213,7 +218,7 @@ class SensorFeatures:
        else:
            raise KeyError("This data type has not been implemented.")
-    def construct_export_path(self):
+    def construct_export_path(self) -> None:
        if not self.participants_label:
            warnings.warn(WARNING_PARTICIPANTS_LABEL, UserWarning)
        self.folder = here("machine_learning/intermediate_results/features", warn=True)
@ -221,59 +226,6 @@ class SensorFeatures:
            self.participants_label + "_" + self.grouping_variable_name
        )
-    def set_participants_label(self, label: str):
+    def set_participants_label(self, label: str) -> None:
        self.participants_label = label
        self.construct_export_path()
 def safe_outer_merge_on_index(left, right):
    if left.empty:
        return right
    elif right.empty:
        return left
    else:
        return pd.merge(
            left,
            right,
            how="outer",
            left_index=True,
            right_index=True,
            validate="one_to_one",
        )
 def to_csv_with_settings(
    df: pd.DataFrame, folder: Path, filename_prefix: str, data_type: str
 ) -> None:
    full_path = construct_full_path(folder, filename_prefix, data_type)
    df.to_csv(
        path_or_buf=full_path,
        sep=",",
        na_rep="NA",
        header=True,
        index=True,
        encoding="utf-8",
    )
    print("Exported the dataframe to " + str(full_path))
 def read_csv_with_settings(
    folder: Path, filename_prefix: str, data_type: str, grouping_variable: list
 ) -> pd.DataFrame:
    full_path = construct_full_path(folder, filename_prefix, data_type)
    return pd.read_csv(
        filepath_or_buffer=full_path,
        sep=",",
        header=0,
        na_values="NA",
        encoding="utf-8",
        index_col=(["participant_id"] + grouping_variable),
        parse_dates=True,
        infer_datetime_format=True,
    )
 def construct_full_path(folder: Path, filename_prefix: str, data_type: str) -> Path:
    export_filename = filename_prefix + "_" + data_type + ".csv"
    full_path = folder / export_filename
    return full_path
--- a/machine_learning/helper.py
+++ b/machine_learning/helper.py
@ -0,0 +1,57 @@
 from pathlib import Path
 import pandas as pd
 def safe_outer_merge_on_index(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
    if left.empty:
        return right
    elif right.empty:
        return left
    else:
        return pd.merge(
            left,
            right,
            how="outer",
            left_index=True,
            right_index=True,
            validate="one_to_one",
        )
 def to_csv_with_settings(
    df: pd.DataFrame, folder: Path, filename_prefix: str, data_type: str
 ) -> None:
    full_path = construct_full_path(folder, filename_prefix, data_type)
    df.to_csv(
        path_or_buf=full_path,
        sep=",",
        na_rep="NA",
        header=True,
        index=True,
        encoding="utf-8",
    )
    print("Exported the dataframe to " + str(full_path))
 def read_csv_with_settings(
    folder: Path, filename_prefix: str, data_type: str, grouping_variable: list
 ) -> pd.DataFrame:
    full_path = construct_full_path(folder, filename_prefix, data_type)
    return pd.read_csv(
        filepath_or_buffer=full_path,
        sep=",",
        header=0,
        na_values="NA",
        encoding="utf-8",
        index_col=(["participant_id"] + grouping_variable),
        parse_dates=True,
        infer_datetime_format=True,
        cache_dates=True,
    )
 def construct_full_path(folder: Path, filename_prefix: str, data_type: str) -> Path:
    export_filename = filename_prefix + "_" + data_type + ".csv"
    full_path = folder / export_filename
    return full_path