Start exploring PANAS data.

Add a function to clean up ESM data.
communication
junos 2021-07-02 16:33:48 +02:00
parent c29c7b19f2
commit 74392f229a
3 changed files with 107 additions and 8 deletions

View File

@ -0,0 +1,53 @@
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.11.2
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
import os
import sys
import seaborn as sns
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
sys.path.append(nb_dir)
import participants.query_db
from features.esm import *
# %% [markdown]
# # ESM data
# %% [markdown]
# Only take data from the main part of the study. The pilot data have different structure, there were especially many additions to ESM_JSON.
# %%
participants_inactive_usernames = participants.query_db.get_usernames(
collection_start=datetime.date.fromisoformat("2020-08-01")
)
df_esm_inactive = get_esm_data(participants_inactive_usernames)
# %%
df_esm_preprocessed = preprocess_esm(df_esm_inactive)
df_esm_clean = clean_up_esm(df_esm_preprocessed)
# %%
df_esm_PANAS = df_esm_clean[
(df_esm_clean["questionnaire_id"] == 8) | (df_esm_clean["questionnaire_id"] == 9)
]
df_esm_PANAS_grouped = df_esm_PANAS.groupby(["participant_id", "questionnaire_id"])
# %%
df_esm_PANAS.head()
# %%

View File

@ -213,3 +213,25 @@ def classify_sessions_by_completion_time(
df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime" df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime"
return df_session_counts_time return df_session_counts_time
def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
"""
This function eliminates invalid ESM responses.
It removes unanswered ESMs.
Parameters
----------
df_esm_preprocessed: pd.DataFrame
A preprocessed dataframe of esm data.
Returns
-------
df_esm_clean: pd.DataFrame
A subset of the original dataframe.
"""
df_esm_clean = df_esm_preprocessed[
df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
]
return df_esm_clean

View File

@ -221,9 +221,15 @@ df_session_workday = df_session_workday.assign(
) )
# %% # %%
g1 = sns.displot(df_session_workday["time_diff_minutes"], binwidth=5, height=5, aspect=1.5, color="#28827C") g1 = sns.displot(
df_session_workday["time_diff_minutes"],
binwidth=5,
height=5,
aspect=1.5,
color="#28827C",
)
g1.set_axis_labels("Time difference [min]", "Session count") g1.set_axis_labels("Time difference [min]", "Session count")
#g1.savefig("WorkdayEMAtimeDiff.pdf") # g1.savefig("WorkdayEMAtimeDiff.pdf")
# %% [markdown] # %% [markdown]
# There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those. # There are some sessions that are really close together. By design, none should be closer than 30 min. Let's take a look at those.
@ -278,9 +284,15 @@ df_mean_daytime_interval = df_session_workday.groupby("participant_id").median()
df_mean_daytime_interval.describe() df_mean_daytime_interval.describe()
# %% # %%
g2 = sns.displot(df_mean_daytime_interval.time_diff_minutes, binwidth=5, height=5, aspect=1.5, color="#28827C") g2 = sns.displot(
df_mean_daytime_interval.time_diff_minutes,
binwidth=5,
height=5,
aspect=1.5,
color="#28827C",
)
g2.set_axis_labels("Median time difference [min]", "Participant count") g2.set_axis_labels("Median time difference [min]", "Participant count")
#g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf") # g2.savefig("WorkdayEMAtimeDiffMedianParticip.pdf")
# %% # %%
df_adherence = df_adherence.merge( df_adherence = df_adherence.merge(
@ -303,7 +315,13 @@ df_count_daytime_per_participant = df_session_workday.groupby(
df_count_daytime_per_participant["time"].describe() df_count_daytime_per_participant["time"].describe()
# %% # %%
sns.displot(df_count_daytime_per_participant.time, binwidth=1, height=5, aspect=1.5, color="#28827C") sns.displot(
df_count_daytime_per_participant.time,
binwidth=1,
height=5,
aspect=1.5,
color="#28827C",
)
# %% [markdown] # %% [markdown]
# ## Evening EMA # ## Evening EMA
@ -334,10 +352,16 @@ s_evening_completed_ratio = (
s_evening_completed_ratio.describe() s_evening_completed_ratio.describe()
# %% # %%
g3 = sns.displot(s_evening_completed_ratio - 0.001, binwidth=0.05, height=5, aspect=1.5, color="#28827C") g3 = sns.displot(
s_evening_completed_ratio - 0.001,
binwidth=0.05,
height=5,
aspect=1.5,
color="#28827C",
)
g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count") g3.set_axis_labels("Ratio of days with the evening EMA filled out", "Participant count")
g3.set(xlim=(1.01,0.59)) g3.set(xlim=(1.01, 0.59))
#g3.savefig("EveningEMAratioParticip.pdf") # g3.savefig("EveningEMAratioParticip.pdf")
# %% # %%
df_adherence = df_adherence.merge( df_adherence = df_adherence.merge(