Add a function to correct the morning questionnaires into daytime.

2021-06-11 16:44:33 +02:00 · 2021-06-11 16:44:33 +02:00 · 294d4a2b49
parent a3417c182a
commit 294d4a2b49
1 changed files with 44 additions and 0 deletions
--- a/features/esm.py
+++ b/features/esm.py
@ -17,6 +17,12 @@ SESSION_STATUS_UNANSWERED = "ema_unanswered"
 SESSION_STATUS_DAY_FINISHED = "day_finished"
 SESSION_STATUS_COMPLETE = "ema_completed"

+MAX_MORNING_LENGTH = 3
+# When the participants was not yet at work at the time of the first (morning) EMA,
+# only three items were answered.
+# Two sleep related items and one indicating NOT starting work yet.
+# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
+

 def get_esm_data(usernames: Collection) -> pd.DataFrame:
    """
@ -165,3 +171,41 @@ def classify_sessions_by_time(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame
        .first()[["time", "datetime_lj"]]
    )
    return df_session_time
+
+
+def classify_sessions_by_completion_time(
+    df_esm_preprocessed: pd.DataFrame,
+) -> pd.DataFrame:
+    """
+    The point of this function is to not only classify sessions by using the previously defined functions.
+    It also serves to "correct" the time type of some EMA sessions.
+
+    A morning questionnaire could seamlessly transition into a daytime questionnaire,
+        if the participant was already at work.
+    In this case, the "time" label changed mid-session.
+    Because of the way classify_sessions_by_time works, this questionnaire was classified as "morning".
+    But for all intents and purposes, it can be treated as a "daytime" EMA.
+
+    The way this scenario is differentiated from a true "morning" questionnaire,
+        where the participants NOT yet at work, is by considering their length.
+
+    Parameters
+    ----------
+    df_esm_preprocessed: pd.DataFrame
+        A preprocessed dataframe of esm data, which must include the session ID (esm_session).
+
+    Returns
+    -------
+    object
+
+    """
+    df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
+    df_session_time = classify_sessions_by_time(df_esm_preprocessed)
+
+    df_session_counts_time = df_session_time.join(df_session_counts)
+
+    df_session_counts_time.loc[
+        df_session_counts_time.esm_session_count > MAX_MORNING_LENGTH, "time"
+    ] = "daytime"
+
+    return df_session_counts_time