2021-07-05 18:32:35 +02:00
# -*- coding: utf-8 -*-
2021-07-02 16:33:48 +02:00
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
2023-05-16 16:13:22 +02:00
# jupytext_version: 1.14.5
2021-07-02 16:33:48 +02:00
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
2022-08-23 16:41:41 +02:00
import datetime
2021-07-02 16:33:48 +02:00
import seaborn as sns
import participants . query_db
2023-07-03 17:09:15 +02:00
from features . esm import QUESTIONNAIRE_IDS , clean_up_esm , get_esm_data , preprocess_esm
2023-05-16 16:13:22 +02:00
from features . esm_JCQ import reverse_jcq_demand_control_scoring
from features . esm_SAM import extract_stressful_events
# import os
# import sys
# nb_dir = os.path.split(os.getcwd())[0]
# if nb_dir not in sys.path:
# sys.path.append(nb_dir)
# %%
2023-07-03 14:50:35 +02:00
save_figs = False
2023-07-03 18:25:40 +02:00
export_data = True
2021-07-02 16:33:48 +02:00
# %%
participants_inactive_usernames = participants . query_db . get_usernames (
collection_start = datetime . date . fromisoformat ( " 2020-08-01 " )
)
df_esm_inactive = get_esm_data ( participants_inactive_usernames )
# %%
df_esm_preprocessed = preprocess_esm ( df_esm_inactive )
2021-07-03 16:34:11 +02:00
# %% [markdown]
# # PANAS
2021-07-02 16:33:48 +02:00
# %%
2021-07-03 16:34:11 +02:00
df_esm_PANAS = df_esm_preprocessed [
2023-07-03 17:09:15 +02:00
(
df_esm_preprocessed [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " PANAS_positive_affect " ]
)
| (
df_esm_preprocessed [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " PANAS_negative_affect " ]
)
2021-07-02 16:33:48 +02:00
]
2021-07-03 16:34:11 +02:00
df_esm_PANAS_clean = clean_up_esm ( df_esm_PANAS )
2021-07-02 16:33:48 +02:00
2021-07-04 13:41:34 +02:00
# %% [markdown]
# Group by participants, date, and subscale and calculate daily means.
2021-07-02 16:33:48 +02:00
# %%
2021-07-04 13:41:34 +02:00
df_esm_PANAS_daily_means = (
2021-07-04 14:34:57 +02:00
df_esm_PANAS_clean . groupby ( [ " participant_id " , " date_lj " , " questionnaire_id " ] )
2021-07-04 13:41:34 +02:00
. esm_user_answer_numeric . agg ( " mean " )
2021-07-03 18:46:06 +02:00
. reset_index ( )
2021-07-04 13:41:34 +02:00
. rename ( columns = { " esm_user_answer_numeric " : " esm_numeric_mean " } )
2021-07-03 18:46:06 +02:00
)
# %% [markdown]
2023-05-16 16:37:34 +02:00
# Next, calculate mean, median, and standard deviation across all days for each participant.
2021-07-03 18:46:06 +02:00
# %%
df_esm_PANAS_summary_participant = (
2021-07-04 13:41:34 +02:00
df_esm_PANAS_daily_means . groupby ( [ " participant_id " , " questionnaire_id " ] )
2023-05-16 17:05:43 +02:00
. esm_numeric_mean . agg ( [ " mean " , " median " , " std " ] )
2021-07-03 18:46:06 +02:00
. reset_index ( col_level = 1 )
)
df_esm_PANAS_summary_participant [
2023-05-17 16:32:27 +02:00
" PANAS subscale "
2021-07-04 13:41:34 +02:00
] = df_esm_PANAS_daily_means . questionnaire_id . astype ( " category " ) . cat . rename_categories (
2023-05-17 16:32:27 +02:00
{ 8.0 : " positive affect " , 9.0 : " negative affect " }
2021-07-03 18:46:06 +02:00
)
2023-07-03 14:50:35 +02:00
# %%
df_esm_PANAS_summary_participant . groupby ( " PANAS subscale " ) . describe ( ) [ " mean " ]
# %%
df_esm_PANAS_summary_participant . groupby ( " PANAS subscale " ) . describe ( ) [ " std " ]
# %%
df_esm_PANAS_summary_participant . query ( " std == 0 " )
2021-07-04 13:41:34 +02:00
# %%
2023-05-16 16:13:22 +02:00
fig1 = sns . displot (
2023-05-17 16:32:27 +02:00
data = df_esm_PANAS_summary_participant , x = " mean " , hue = " PANAS subscale " , binwidth = 0.2
2021-07-04 13:41:34 +02:00
)
2023-05-17 16:32:27 +02:00
fig1 . set_axis_labels ( x_var = " participant mean " , y_var = " frequency " )
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig1 . figure . savefig ( " PANAS_mean_participant.pdf " , dpi = 300 )
2021-07-03 18:46:06 +02:00
# %%
sns . displot (
2021-07-04 14:34:57 +02:00
data = df_esm_PANAS_summary_participant ,
x = " median " ,
2023-05-17 16:32:27 +02:00
hue = " PANAS subscale " ,
2021-07-04 14:34:57 +02:00
binwidth = 0.2 ,
2021-07-03 18:46:06 +02:00
)
# %%
2023-05-16 16:13:22 +02:00
fig2 = sns . displot (
2023-05-17 16:32:27 +02:00
data = df_esm_PANAS_summary_participant , x = " std " , hue = " PANAS subscale " , binwidth = 0.05
2021-07-03 18:46:06 +02:00
)
2023-05-17 16:32:27 +02:00
fig2 . set_axis_labels ( x_var = " participant standard deviation " , y_var = " frequency " )
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig2 . figure . savefig ( " PANAS_std_participant.pdf " , dpi = 300 )
2021-07-04 13:41:34 +02:00
# %%
df_esm_PANAS_summary_participant [ df_esm_PANAS_summary_participant [ " std " ] < 0.1 ]
2021-07-04 14:34:57 +02:00
# %% [markdown]
# # Stress appraisal measure
2022-08-23 16:41:41 +02:00
# %%
df_SAM_all = extract_stressful_events ( df_esm_inactive )
# %%
df_SAM_all . head ( )
2021-07-04 14:34:57 +02:00
# %%
df_esm_SAM = df_esm_preprocessed [
2023-07-03 17:09:15 +02:00
(
df_esm_preprocessed [ " questionnaire_id " ]
> = QUESTIONNAIRE_IDS [ " appraisal_stressfulness_event " ]
)
& (
df_esm_preprocessed [ " questionnaire_id " ]
< = QUESTIONNAIRE_IDS [ " appraisal_stressfulness_period " ]
)
2021-07-04 14:34:57 +02:00
]
2021-07-04 16:29:53 +02:00
df_esm_SAM_clean = clean_up_esm ( df_esm_SAM )
# %% [markdown]
# ## Stressful events
# %%
2023-07-03 17:09:15 +02:00
df_esm_SAM_event = df_esm_SAM_clean [
df_esm_SAM_clean [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " appraisal_stressfulness_event " ]
] . assign ( stressful_event = lambda x : ( x . esm_user_answer_numeric > 0 ) )
2021-07-04 16:29:53 +02:00
# %%
df_esm_SAM_daily_events = (
df_esm_SAM_event . groupby ( [ " participant_id " , " date_lj " ] )
. stressful_event . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " stressful_event " : " SAM_event_ratio " } )
)
# %% [markdown]
2023-05-16 16:37:34 +02:00
# Calculate the daily mean of YES (1) or NO (0) answers to the question about stressful events. This is then the daily ratio of EMA sessions that included a stressful event.
2021-07-04 16:29:53 +02:00
# %%
df_esm_SAM_event_summary_participant = (
df_esm_SAM_daily_events . groupby ( [ " participant_id " ] )
2023-05-16 17:05:43 +02:00
. SAM_event_ratio . agg ( [ " mean " , " median " , " std " ] )
2021-07-04 16:29:53 +02:00
. reset_index ( col_level = 1 )
)
# %%
2023-05-16 16:13:22 +02:00
fig6 = sns . displot ( data = df_esm_SAM_event_summary_participant , x = " mean " , binwidth = 0.1 )
2023-05-17 16:32:27 +02:00
fig6 . set_axis_labels (
x_var = " participant proportion of stressful events " , y_var = " frequency "
)
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig6 . figure . savefig ( " SAM_events_mean_participant.pdf " , dpi = 300 )
2021-07-04 16:29:53 +02:00
# %%
sns . displot ( data = df_esm_SAM_event_summary_participant , x = " std " , binwidth = 0.05 )
# %% [markdown]
# ### Threat and challenge
# %% [markdown]
# * Example of threat: "Did this event make you feel anxious?"
# * Example of challenge: "How eager are you to tackle this event?"
2023-05-16 16:13:22 +02:00
# * Possible answers:
# 0 - Not at all,
# 1 - Slightly,
# 2 - Moderately,
# 3 - Considerably,
# 4 - Extremely
2021-07-04 16:29:53 +02:00
# %%
df_esm_SAM_daily = (
df_esm_SAM_clean . groupby ( [ " participant_id " , " date_lj " , " questionnaire_id " ] )
. esm_user_answer_numeric . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " esm_user_answer_numeric " : " esm_numeric_mean " } )
)
# %%
df_esm_SAM_daily_threat_challenge = df_esm_SAM_daily [
2023-07-03 17:09:15 +02:00
( df_esm_SAM_daily [ " questionnaire_id " ] == QUESTIONNAIRE_IDS [ " appraisal_threat " ] )
| ( df_esm_SAM_daily [ " questionnaire_id " ] == QUESTIONNAIRE_IDS [ " appraisal_challenge " ] )
2021-07-04 16:29:53 +02:00
]
# %%
df_esm_SAM_summary_participant = (
df_esm_SAM_daily . groupby ( [ " participant_id " , " questionnaire_id " ] )
2023-05-16 17:05:43 +02:00
. esm_numeric_mean . agg ( [ " mean " , " median " , " std " ] )
2021-07-04 16:29:53 +02:00
. reset_index ( col_level = 1 )
)
2021-07-04 14:34:57 +02:00
2023-07-03 14:50:35 +02:00
# %%
df_esm_SAM_event_stressfulness_summary_participant = df_esm_SAM_summary_participant [
2023-07-03 17:09:15 +02:00
df_esm_SAM_summary_participant [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " appraisal_stressfulness_event " ]
2023-07-03 14:50:35 +02:00
]
df_esm_SAM_event_stressfulness_summary_participant . describe ( ) [ " mean " ]
# %%
df_esm_SAM_event_stressfulness_summary_participant . describe ( ) [ " std " ]
# %%
sns . displot (
data = df_esm_SAM_event_stressfulness_summary_participant , x = " mean " , binwidth = 0.2
)
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
df_esm_SAM_threat_challenge_summary_participant = df_esm_SAM_summary_participant [
2023-07-03 17:17:56 +02:00
(
df_esm_SAM_summary_participant [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " appraisal_threat " ]
)
| (
df_esm_SAM_summary_participant [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " appraisal_challenge " ]
)
2021-07-04 16:29:53 +02:00
]
df_esm_SAM_threat_challenge_summary_participant [
2023-05-17 16:32:27 +02:00
" event subscale "
2021-07-04 16:29:53 +02:00
] = df_esm_SAM_threat_challenge_summary_participant . questionnaire_id . astype (
" category "
) . cat . rename_categories (
{ 88 : " threat " , 89 : " challenge " }
)
# %%
sns . displot (
data = df_esm_SAM_threat_challenge_summary_participant ,
x = " mean " ,
2023-05-17 16:32:27 +02:00
hue = " event subscale " ,
2021-07-04 16:29:53 +02:00
binwidth = 0.2 ,
)
2021-07-04 14:34:57 +02:00
# %%
2023-05-16 16:13:22 +02:00
fig3 = sns . displot (
2021-07-04 16:29:53 +02:00
data = df_esm_SAM_threat_challenge_summary_participant ,
x = " std " ,
2023-05-17 16:32:27 +02:00
hue = " event subscale " ,
2021-07-04 16:29:53 +02:00
binwidth = 0.1 ,
)
2023-05-17 16:32:27 +02:00
fig3 . set_axis_labels ( x_var = " participant standard deviation " , y_var = " frequency " )
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig3 . figure . savefig ( " SAM_std_participant.pdf " , dpi = 300 )
2021-07-04 16:29:53 +02:00
2023-07-03 14:50:35 +02:00
# %%
df_esm_SAM_threat_challenge_summary_participant . groupby ( " event subscale " ) . describe ( ) [
" mean "
]
# %%
df_esm_SAM_threat_challenge_summary_participant . groupby ( " event subscale " ) . describe ( ) [
" std "
]
2023-07-03 18:25:40 +02:00
# %%
df_esm_SAM_clean . columns
# %%
df_esm_SAM_clean . esm_status . value_counts ( )
# %%
if export_data :
df_esm_SAM_for_export = df_esm_SAM_clean [
[
" participant_id " ,
" username " ,
" device_id " ,
" _id " ,
" esm_trigger " ,
" esm_session " ,
" esm_notification_id " ,
" question_id " ,
" questionnaire_id " ,
" double_esm_user_answer_timestamp " ,
" datetime_lj " ,
" date_lj " ,
" time " ,
" esm_user_answer " ,
" esm_user_answer_numeric " ,
]
]
df_esm_SAM_for_export . sort_values (
2023-07-03 18:33:28 +02:00
by = [ " participant_id " , " device_id " , " _id " ] , ignore_index = True , inplace = True
2023-07-03 18:25:40 +02:00
)
print ( df_esm_SAM_for_export . head ( ) )
df_esm_SAM_for_export . to_csv (
2023-07-03 18:33:28 +02:00
" ../data/raw/df_esm_SAM_threat_challenge.csv " , index = False
2023-07-03 18:25:40 +02:00
)
2021-07-04 16:29:53 +02:00
# %% [markdown]
# ## Stressfulness of period
# %%
df_esm_SAM_period_summary_participant = df_esm_SAM_summary_participant [
2023-07-03 17:09:15 +02:00
df_esm_SAM_summary_participant [ " questionnaire_id " ]
== QUESTIONNAIRE_IDS [ " appraisal_stressfulness_period " ]
2021-07-04 16:29:53 +02:00
]
2021-07-04 14:34:57 +02:00
2023-07-03 14:50:35 +02:00
# %%
df_esm_SAM_period_summary_participant . describe ( ) [ " mean " ]
# %%
df_esm_SAM_period_summary_participant . describe ( ) [ " std " ]
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
sns . displot ( data = df_esm_SAM_period_summary_participant , x = " mean " , binwidth = 0.2 )
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
sns . displot ( data = df_esm_SAM_period_summary_participant , x = " std " , binwidth = 0.1 )
2021-07-05 18:32:35 +02:00
# %% [markdown]
# # Job demand and control
# %%
df_esm_JCQ_demand_control = df_esm_preprocessed [
2023-07-03 17:09:15 +02:00
( df_esm_preprocessed [ " questionnaire_id " ] > = QUESTIONNAIRE_IDS [ " JCQ_job_demand " ] )
& ( df_esm_preprocessed [ " questionnaire_id " ] < = QUESTIONNAIRE_IDS [ " JCQ_job_control " ] )
2021-07-05 18:32:35 +02:00
]
df_esm_JCQ_demand_control_clean = clean_up_esm ( df_esm_JCQ_demand_control )
# %%
df_esm_JCQ_demand_control_reversed = reverse_jcq_demand_control_scoring (
df_esm_JCQ_demand_control_clean
)
# %%
df_esm_JCQ_daily = (
df_esm_JCQ_demand_control_reversed . groupby (
[ " participant_id " , " date_lj " , " questionnaire_id " ]
)
. esm_user_score . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " esm_user_score " : " esm_score_mean " } )
)
df_esm_JCQ_summary_participant = (
df_esm_JCQ_daily . groupby ( [ " participant_id " , " questionnaire_id " ] )
2023-05-16 17:05:43 +02:00
. esm_score_mean . agg ( [ " mean " , " median " , " std " ] )
2021-07-05 18:32:35 +02:00
. reset_index ( col_level = 1 )
)
df_esm_JCQ_summary_participant [
2023-05-17 16:32:27 +02:00
" JCQ subscale "
2021-07-05 18:32:35 +02:00
] = df_esm_JCQ_summary_participant . questionnaire_id . astype (
" category "
) . cat . rename_categories (
{ 10 : " job demand " , 11 : " job control " }
)
2023-07-03 14:50:35 +02:00
# %%
df_esm_JCQ_summary_participant . groupby ( " JCQ subscale " ) . describe ( ) [ " mean " ]
# %%
df_esm_JCQ_summary_participant . groupby ( " JCQ subscale " ) . describe ( ) [ " std " ]
2021-07-05 18:32:35 +02:00
# %%
2023-05-16 16:13:22 +02:00
fig4 = sns . displot (
data = df_esm_JCQ_summary_participant ,
x = " mean " ,
2023-05-17 16:32:27 +02:00
hue = " JCQ subscale " ,
2023-05-16 16:13:22 +02:00
binwidth = 0.1 ,
2021-07-05 18:32:35 +02:00
)
2023-05-17 16:32:27 +02:00
fig4 . set_axis_labels ( x_var = " participant mean " , y_var = " frequency " )
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig4 . figure . savefig ( " JCQ_mean_participant.pdf " , dpi = 300 )
2021-07-05 18:32:35 +02:00
# %%
2023-05-16 16:13:22 +02:00
fig5 = sns . displot (
data = df_esm_JCQ_summary_participant ,
x = " std " ,
2023-05-17 16:32:27 +02:00
hue = " JCQ subscale " ,
2023-05-16 16:13:22 +02:00
binwidth = 0.05 ,
2021-07-05 18:32:35 +02:00
)
2023-05-17 16:32:27 +02:00
fig6 . set_axis_labels ( x_var = " participant standard deviation " , y_var = " frequency " )
2023-05-16 16:13:22 +02:00
if save_figs :
2023-05-16 17:05:43 +02:00
fig5 . figure . savefig ( " JCQ_std_participant.pdf " , dpi = 300 )
2023-05-17 16:32:27 +02:00
2023-07-03 17:09:15 +02:00
# %% [markdown]
# # COPE Inventory
2023-05-17 16:32:27 +02:00
# %%
2023-07-03 17:09:15 +02:00
df_esm_COPE = df_esm_preprocessed [
( df_esm_preprocessed [ " questionnaire_id " ] > = QUESTIONNAIRE_IDS [ " COPE_active " ] )
& ( df_esm_preprocessed [ " questionnaire_id " ] < = QUESTIONNAIRE_IDS [ " COPE_emotions " ] )
]
2023-07-03 18:33:28 +02:00
# %%
df_esm_COPE_clean = clean_up_esm ( df_esm_COPE )
# %%
if export_data :
df_esm_COPE_for_export = df_esm_COPE_clean [
[
" participant_id " ,
" username " ,
" device_id " ,
" _id " ,
" esm_trigger " ,
" esm_session " ,
" esm_notification_id " ,
" question_id " ,
" questionnaire_id " ,
" double_esm_user_answer_timestamp " ,
" datetime_lj " ,
" date_lj " ,
" time " ,
" esm_user_answer " ,
" esm_user_answer_numeric " ,
]
]
df_esm_COPE_for_export . sort_values (
by = [ " participant_id " , " device_id " , " _id " ] , ignore_index = True , inplace = True
)
print ( df_esm_COPE_for_export . head ( ) )
df_esm_COPE_for_export . to_csv ( " ../data/raw/df_esm_COPE.csv " , index = False )