From b8301ca458653ad47a5a10f0410fa9d75dcb8eac Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Sat, 3 Jul 2021 18:45:46 +0200
Subject: [PATCH] Small corrections.

---
 config/models.py                  |  2 +-
 exploration/expl_communication.py |  8 +++++---
 exploration/expl_esm_adherence.py | 23 ++++++++++++++---------
 features/communication.py         |  2 +-
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/config/models.py b/config/models.py
index efa7730..43da431 100644
--- a/config/models.py
+++ b/config/models.py
@@ -212,7 +212,7 @@ class Call(Base, AWAREsensor):
     call_type: int
         One of the Android’s call types (1 – incoming, 2 – outgoing, 3 – missed).
     call_duration: int
-        Length of the call session.
+        Length of the call session in seconds.
     trace: str(40)
         A hash value SHA-1 of the phone number (source or target) of the call
     """
diff --git a/exploration/expl_communication.py b/exploration/expl_communication.py
index d077ed1..e1a0c27 100644
--- a/exploration/expl_communication.py
+++ b/exploration/expl_communication.py
@@ -62,7 +62,7 @@ df_calls_features.describe()
 
 # %%
 calls_number = pd.wide_to_long(
-    df_calls_features.reset_index(),
+    df_calls_features[["no_incoming", "no_outgoing", "no_missed"]].reset_index(),
     i="participant_id",
     j="call_type",
     stubnames="no",
@@ -75,7 +75,9 @@ sns.displot(calls_number, x="no", hue="call_type", binwidth=5, element="step", h
 
 # %%
 calls_duration = pd.wide_to_long(
-    df_calls_features.reset_index(),
+    df_calls_features[
+        ["duration_total_incoming", "duration_total_outgoing"]
+    ].reset_index(),
     i="participant_id",
     j="call_type",
     stubnames="duration",
@@ -114,7 +116,7 @@ df_sms_features.describe()
 
 # %%
 sms_number = pd.wide_to_long(
-    df_sms_features.reset_index(),
+    df_sms_features[["no_received", "no_sent"]].reset_index(),
     i="participant_id",
     j="message_type",
     stubnames="no",
diff --git a/exploration/expl_esm_adherence.py b/exploration/expl_esm_adherence.py
index aa33a66..0d2aaf6 100644
--- a/exploration/expl_esm_adherence.py
+++ b/exploration/expl_esm_adherence.py
@@ -61,9 +61,9 @@ df_esm_preprocessed.columns
 # One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first.
 
 # %%
-session_counts = df_esm_preprocessed.groupby(
-    ["participant_id", "device_id", "esm_session"]
-).count()["id"]
+session_counts = df_esm_preprocessed.groupby(["participant_id", "esm_session"]).count()[
+    "id"
+]
 
 # %% [markdown]
 # Group data by participant_id and esm_session and count the number of instances (by id). Session counts are therefore counts of how many times a specific session ID appears *within* a specific participant.
@@ -145,11 +145,7 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[
 df_esm_2 = (
     df_session_counts[df_session_counts["esm_session_count"] == 2]
     .reset_index()
-    .merge(
-        df_esm_preprocessed,
-        how="left",
-        on=["participant_id", "device_id", "esm_session"],
-    )
+    .merge(df_esm_preprocessed, how="left", on=["participant_id", "esm_session"],)
 )
 # with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
 # display(df_esm_2)
@@ -318,7 +314,7 @@ df_session_counts.loc[
     df_session_counts.session_response.isna(), "esm_session_count"
 ].value_counts().sort_index()
 
-# %%
+# %% tags=[]
 df_session_7 = df_session_counts[
     (df_session_counts["esm_session_count"] == 7)
     & df_session_counts.session_response.isna()
@@ -369,10 +365,19 @@ df_esm_session_6 = df_session_6.join(
 # %%
 display(df_esm_session_6[["esm_trigger", "esm_instructions", "esm_user_answer"]])
 
+# %% [markdown]
+# The 6-question sessions are long interruptions of work during daytime.
+
+# %% [markdown]
+# # Count and classify sessions
+
 # %%
 df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
 df_session_time = classify_sessions_by_time(df_esm_preprocessed)
 
+# %%
+df_session_time
+
 # %% [markdown]
 # The sessions were classified by time by taking the **first** record in a session.
 # However, a morning questionnaire could seamlessly transition into a daytime questionnaire, if the participant was already at work.
diff --git a/features/communication.py b/features/communication.py
index 771e08e..d1dc4ee 100644
--- a/features/communication.py
+++ b/features/communication.py
@@ -180,7 +180,7 @@ def contact_features():
     # TODO Implement a method that takes a DF with enumerated contacts as argument and calculates:
     # * Duration of calls per caller (for most common callers)
     # * Determine work vs non-work contacts by work hours heuristics
-    # * Numer of people contacted
+    # * Number of people contacted
     # And similarly for SMS.
     pass