2021-07-05 18:32:35 +02:00
# -*- coding: utf-8 -*-
2021-07-02 16:33:48 +02:00
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
2022-08-23 16:41:41 +02:00
# jupytext_version: 1.13.0
2021-07-02 16:33:48 +02:00
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
import os
import sys
2022-08-23 16:41:41 +02:00
import datetime
2021-07-02 16:33:48 +02:00
import seaborn as sns
nb_dir = os . path . split ( os . getcwd ( ) ) [ 0 ]
if nb_dir not in sys . path :
sys . path . append ( nb_dir )
import participants . query_db
from features . esm import *
2021-07-05 18:32:35 +02:00
from features . esm_JCQ import *
2022-08-23 16:41:41 +02:00
from features . esm_SAM import *
2021-07-02 16:33:48 +02:00
# %%
participants_inactive_usernames = participants . query_db . get_usernames (
collection_start = datetime . date . fromisoformat ( " 2020-08-01 " )
)
df_esm_inactive = get_esm_data ( participants_inactive_usernames )
# %%
df_esm_preprocessed = preprocess_esm ( df_esm_inactive )
2021-07-03 16:34:11 +02:00
# %% [markdown]
# # PANAS
2021-07-02 16:33:48 +02:00
# %%
2021-07-03 16:34:11 +02:00
df_esm_PANAS = df_esm_preprocessed [
( df_esm_preprocessed [ " questionnaire_id " ] == 8 )
| ( df_esm_preprocessed [ " questionnaire_id " ] == 9 )
2021-07-02 16:33:48 +02:00
]
2021-07-03 16:34:11 +02:00
df_esm_PANAS_clean = clean_up_esm ( df_esm_PANAS )
2021-07-02 16:33:48 +02:00
2021-07-04 13:41:34 +02:00
# %% [markdown]
# Group by participants, date, and subscale and calculate daily means.
2021-07-02 16:33:48 +02:00
# %%
2021-07-04 13:41:34 +02:00
df_esm_PANAS_daily_means = (
2021-07-04 14:34:57 +02:00
df_esm_PANAS_clean . groupby ( [ " participant_id " , " date_lj " , " questionnaire_id " ] )
2021-07-04 13:41:34 +02:00
. esm_user_answer_numeric . agg ( " mean " )
2021-07-03 18:46:06 +02:00
. reset_index ( )
2021-07-04 13:41:34 +02:00
. rename ( columns = { " esm_user_answer_numeric " : " esm_numeric_mean " } )
2021-07-03 18:46:06 +02:00
)
# %% [markdown]
2021-07-04 13:41:34 +02:00
# Next, calculate mean, median, and standard deviation across all days for each participant.
2021-07-03 18:46:06 +02:00
# %%
df_esm_PANAS_summary_participant = (
2021-07-04 13:41:34 +02:00
df_esm_PANAS_daily_means . groupby ( [ " participant_id " , " questionnaire_id " ] )
2021-07-03 18:46:06 +02:00
. agg ( [ " mean " , " median " , " std " ] )
. reset_index ( col_level = 1 )
)
df_esm_PANAS_summary_participant . columns = df_esm_PANAS_summary_participant . columns . get_level_values (
1
)
df_esm_PANAS_summary_participant [
" PANAS_subscale "
2021-07-04 13:41:34 +02:00
] = df_esm_PANAS_daily_means . questionnaire_id . astype ( " category " ) . cat . rename_categories (
2021-07-03 18:46:06 +02:00
{ 8.0 : " PA " , 9.0 : " NA " }
)
2021-07-04 13:41:34 +02:00
# %%
sns . displot (
data = df_esm_PANAS_summary_participant , x = " mean " , hue = " PANAS_subscale " , binwidth = 0.2
)
2021-07-03 18:46:06 +02:00
# %%
sns . displot (
2021-07-04 14:34:57 +02:00
data = df_esm_PANAS_summary_participant ,
x = " median " ,
hue = " PANAS_subscale " ,
binwidth = 0.2 ,
2021-07-03 18:46:06 +02:00
)
# %%
sns . displot (
2021-07-04 13:41:34 +02:00
data = df_esm_PANAS_summary_participant , x = " std " , hue = " PANAS_subscale " , binwidth = 0.05
2021-07-03 18:46:06 +02:00
)
2021-07-04 13:41:34 +02:00
# %%
df_esm_PANAS_summary_participant [ df_esm_PANAS_summary_participant [ " std " ] < 0.1 ]
2021-07-04 14:34:57 +02:00
# %% [markdown]
# # Stress appraisal measure
2022-08-23 16:41:41 +02:00
# %%
df_SAM_all = extract_stressful_events ( df_esm_inactive )
# %%
df_SAM_all . head ( )
2021-07-04 14:34:57 +02:00
# %%
df_esm_SAM = df_esm_preprocessed [
( df_esm_preprocessed [ " questionnaire_id " ] > = 87 )
& ( df_esm_preprocessed [ " questionnaire_id " ] < = 93 )
]
2021-07-04 16:29:53 +02:00
df_esm_SAM_clean = clean_up_esm ( df_esm_SAM )
# %% [markdown]
# ## Stressful events
# %%
df_esm_SAM_event = df_esm_SAM_clean [ df_esm_SAM_clean [ " questionnaire_id " ] == 87 ] . assign (
stressful_event = lambda x : ( x . esm_user_answer_numeric > 0 )
)
# %%
df_esm_SAM_daily_events = (
df_esm_SAM_event . groupby ( [ " participant_id " , " date_lj " ] )
. stressful_event . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " stressful_event " : " SAM_event_ratio " } )
)
# %% [markdown]
# Calculate the daily mean of YES (1) or NO (0) answers to the question about a stressful events. This is then the daily ratio of EMA sessions that included a stressful event.
# %%
df_esm_SAM_event_summary_participant = (
df_esm_SAM_daily_events . groupby ( [ " participant_id " ] )
. agg ( [ " mean " , " median " , " std " ] )
. reset_index ( col_level = 1 )
)
df_esm_SAM_event_summary_participant . columns = df_esm_SAM_event_summary_participant . columns . get_level_values (
1
)
# %%
sns . displot ( data = df_esm_SAM_event_summary_participant , x = " mean " , binwidth = 0.1 )
# %%
sns . displot ( data = df_esm_SAM_event_summary_participant , x = " std " , binwidth = 0.05 )
# %% [markdown]
# ### Threat and challenge
# %% [markdown]
# * Example of threat: "Did this event make you feel anxious?"
# * Example of challenge: "How eager are you to tackle this event?"
# * Possible answers: 0 - Not at all, 1 - Slightly, 2 - Moderately, 3 - Considerably, 4 - Extremely
# %%
df_esm_SAM_daily = (
df_esm_SAM_clean . groupby ( [ " participant_id " , " date_lj " , " questionnaire_id " ] )
. esm_user_answer_numeric . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " esm_user_answer_numeric " : " esm_numeric_mean " } )
)
# %%
df_esm_SAM_daily_threat_challenge = df_esm_SAM_daily [
( df_esm_SAM_daily [ " questionnaire_id " ] == 88 )
| ( df_esm_SAM_daily [ " questionnaire_id " ] == 89 )
]
# %%
df_esm_SAM_summary_participant = (
df_esm_SAM_daily . groupby ( [ " participant_id " , " questionnaire_id " ] )
. agg ( [ " mean " , " median " , " std " ] )
. reset_index ( col_level = 1 )
)
df_esm_SAM_summary_participant . columns = df_esm_SAM_summary_participant . columns . get_level_values (
1
)
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
df_esm_SAM_threat_challenge_summary_participant = df_esm_SAM_summary_participant [
( df_esm_SAM_summary_participant [ " questionnaire_id " ] == 88 )
| ( df_esm_SAM_summary_participant [ " questionnaire_id " ] == 89 )
]
df_esm_SAM_threat_challenge_summary_participant [
" event_subscale "
] = df_esm_SAM_threat_challenge_summary_participant . questionnaire_id . astype (
" category "
) . cat . rename_categories (
{ 88 : " threat " , 89 : " challenge " }
)
# %%
sns . displot (
data = df_esm_SAM_threat_challenge_summary_participant ,
x = " mean " ,
hue = " event_subscale " ,
binwidth = 0.2 ,
)
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
sns . displot (
data = df_esm_SAM_threat_challenge_summary_participant ,
x = " std " ,
hue = " event_subscale " ,
binwidth = 0.1 ,
)
# %% [markdown]
# ## Stressfulness of period
# %%
df_esm_SAM_period_summary_participant = df_esm_SAM_summary_participant [
df_esm_SAM_summary_participant [ " questionnaire_id " ] == 93
]
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
sns . displot ( data = df_esm_SAM_period_summary_participant , x = " mean " , binwidth = 0.2 )
2021-07-04 14:34:57 +02:00
# %%
2021-07-04 16:29:53 +02:00
sns . displot ( data = df_esm_SAM_period_summary_participant , x = " std " , binwidth = 0.1 )
2021-07-05 18:32:35 +02:00
# %% [markdown]
# # Job demand and control
# %%
df_esm_JCQ_demand_control = df_esm_preprocessed [
( df_esm_preprocessed [ " questionnaire_id " ] > = 10 )
& ( df_esm_preprocessed [ " questionnaire_id " ] < = 11 )
]
df_esm_JCQ_demand_control_clean = clean_up_esm ( df_esm_JCQ_demand_control )
# %%
df_esm_JCQ_demand_control_reversed = reverse_jcq_demand_control_scoring (
df_esm_JCQ_demand_control_clean
)
# %%
df_esm_JCQ_daily = (
df_esm_JCQ_demand_control_reversed . groupby (
[ " participant_id " , " date_lj " , " questionnaire_id " ]
)
. esm_user_score . agg ( " mean " )
. reset_index ( )
. rename ( columns = { " esm_user_score " : " esm_score_mean " } )
)
df_esm_JCQ_summary_participant = (
df_esm_JCQ_daily . groupby ( [ " participant_id " , " questionnaire_id " ] )
. agg ( [ " mean " , " median " , " std " ] )
. reset_index ( col_level = 1 )
)
df_esm_JCQ_summary_participant . columns = df_esm_JCQ_summary_participant . columns . get_level_values (
1
)
df_esm_JCQ_summary_participant [
" JCQ_subscale "
] = df_esm_JCQ_summary_participant . questionnaire_id . astype (
" category "
) . cat . rename_categories (
{ 10 : " job demand " , 11 : " job control " }
)
# %%
sns . displot (
data = df_esm_JCQ_summary_participant , x = " mean " , hue = " JCQ_subscale " , binwidth = 0.1 ,
)
# %%
sns . displot (
data = df_esm_JCQ_summary_participant , x = " std " , hue = " JCQ_subscale " , binwidth = 0.05 ,
)