From b8301ca458653ad47a5a10f0410fa9d75dcb8eac Mon Sep 17 00:00:00 2001 From: junos Date: Sat, 3 Jul 2021 18:45:46 +0200 Subject: [PATCH] Small corrections. --- config/models.py | 2 +- exploration/expl_communication.py | 8 +++++--- exploration/expl_esm_adherence.py | 23 ++++++++++++++--------- features/communication.py | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/config/models.py b/config/models.py index efa7730..43da431 100644 --- a/config/models.py +++ b/config/models.py @@ -212,7 +212,7 @@ class Call(Base, AWAREsensor): call_type: int One of the Android’s call types (1 – incoming, 2 – outgoing, 3 – missed). call_duration: int - Length of the call session. + Length of the call session in seconds. trace: str(40) A hash value SHA-1 of the phone number (source or target) of the call """ diff --git a/exploration/expl_communication.py b/exploration/expl_communication.py index d077ed1..e1a0c27 100644 --- a/exploration/expl_communication.py +++ b/exploration/expl_communication.py @@ -62,7 +62,7 @@ df_calls_features.describe() # %% calls_number = pd.wide_to_long( - df_calls_features.reset_index(), + df_calls_features[["no_incoming", "no_outgoing", "no_missed"]].reset_index(), i="participant_id", j="call_type", stubnames="no", @@ -75,7 +75,9 @@ sns.displot(calls_number, x="no", hue="call_type", binwidth=5, element="step", h # %% calls_duration = pd.wide_to_long( - df_calls_features.reset_index(), + df_calls_features[ + ["duration_total_incoming", "duration_total_outgoing"] + ].reset_index(), i="participant_id", j="call_type", stubnames="duration", @@ -114,7 +116,7 @@ df_sms_features.describe() # %% sms_number = pd.wide_to_long( - df_sms_features.reset_index(), + df_sms_features[["no_received", "no_sent"]].reset_index(), i="participant_id", j="message_type", stubnames="no", diff --git a/exploration/expl_esm_adherence.py b/exploration/expl_esm_adherence.py index aa33a66..0d2aaf6 100644 --- a/exploration/expl_esm_adherence.py +++ b/exploration/expl_esm_adherence.py @@ -61,9 +61,9 @@ df_esm_preprocessed.columns # One approach would be to count distinct session IDs which are incremented for each group of EMAs. However, since not every question answered counts as a fulfilled EMA, some unique session IDs should be eliminated first. # %% -session_counts = df_esm_preprocessed.groupby( - ["participant_id", "device_id", "esm_session"] -).count()["id"] +session_counts = df_esm_preprocessed.groupby(["participant_id", "esm_session"]).count()[ + "id" +] # %% [markdown] # Group data by participant_id and esm_session and count the number of instances (by id). Session counts are therefore counts of how many times a specific session ID appears *within* a specific participant. @@ -145,11 +145,7 @@ df_esm_preprocessed.query("participant_id == 31 & esm_session == 77")[ df_esm_2 = ( df_session_counts[df_session_counts["esm_session_count"] == 2] .reset_index() - .merge( - df_esm_preprocessed, - how="left", - on=["participant_id", "device_id", "esm_session"], - ) + .merge(df_esm_preprocessed, how="left", on=["participant_id", "esm_session"],) ) # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also # display(df_esm_2) @@ -318,7 +314,7 @@ df_session_counts.loc[ df_session_counts.session_response.isna(), "esm_session_count" ].value_counts().sort_index() -# %% +# %% tags=[] df_session_7 = df_session_counts[ (df_session_counts["esm_session_count"] == 7) & df_session_counts.session_response.isna() @@ -369,10 +365,19 @@ df_esm_session_6 = df_session_6.join( # %% display(df_esm_session_6[["esm_trigger", "esm_instructions", "esm_user_answer"]]) +# %% [markdown] +# The 6-question sessions are long interruptions of work during daytime. + +# %% [markdown] +# # Count and classify sessions + # %% df_session_counts = classify_sessions_by_completion(df_esm_preprocessed) df_session_time = classify_sessions_by_time(df_esm_preprocessed) +# %% +df_session_time + # %% [markdown] # The sessions were classified by time by taking the **first** record in a session. # However, a morning questionnaire could seamlessly transition into a daytime questionnaire, if the participant was already at work. diff --git a/features/communication.py b/features/communication.py index 771e08e..d1dc4ee 100644 --- a/features/communication.py +++ b/features/communication.py @@ -180,7 +180,7 @@ def contact_features(): # TODO Implement a method that takes a DF with enumerated contacts as argument and calculates: # * Duration of calls per caller (for most common callers) # * Determine work vs non-work contacts by work hours heuristics - # * Numer of people contacted + # * Number of people contacted # And similarly for SMS. pass