parent
577f1330da
commit
ae2ca63bc4
5
.flake8
5
.flake8
|
@ -1,6 +1,9 @@
|
||||||
[flake8]
|
[flake8]
|
||||||
max-line-length = 88
|
max-line-length = 88
|
||||||
extend-ignore = E203
|
extend-ignore =
|
||||||
|
E203,
|
||||||
|
# E501 line too long for docstrings
|
||||||
|
D501
|
||||||
per-file-ignores =
|
per-file-ignores =
|
||||||
exploration/*.py:E501
|
exploration/*.py:E501
|
||||||
docstring-convention = numpy
|
docstring-convention = numpy
|
||||||
|
|
|
@ -16,7 +16,6 @@ dependencies:
|
||||||
- pandas
|
- pandas
|
||||||
- psycopg2 >= 2.9.1
|
- psycopg2 >= 2.9.1
|
||||||
- pre-commit
|
- pre-commit
|
||||||
- pydocstyle
|
|
||||||
- python-dotenv
|
- python-dotenv
|
||||||
- pytz
|
- pytz
|
||||||
- pyprojroot
|
- pyprojroot
|
||||||
|
|
|
@ -20,7 +20,7 @@ import datetime
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
|
|
||||||
import participants.query_db
|
import participants.query_db
|
||||||
from features.esm import clean_up_esm, get_esm_data, preprocess_esm
|
from features.esm import QUESTIONNAIRE_IDS, clean_up_esm, get_esm_data, preprocess_esm
|
||||||
from features.esm_JCQ import reverse_jcq_demand_control_scoring
|
from features.esm_JCQ import reverse_jcq_demand_control_scoring
|
||||||
from features.esm_SAM import extract_stressful_events
|
from features.esm_SAM import extract_stressful_events
|
||||||
|
|
||||||
|
@ -48,8 +48,14 @@ df_esm_preprocessed = preprocess_esm(df_esm_inactive)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_PANAS = df_esm_preprocessed[
|
df_esm_PANAS = df_esm_preprocessed[
|
||||||
(df_esm_preprocessed["questionnaire_id"] == 8)
|
(
|
||||||
| (df_esm_preprocessed["questionnaire_id"] == 9)
|
df_esm_preprocessed["questionnaire_id"]
|
||||||
|
== QUESTIONNAIRE_IDS["PANAS_positive_affect"]
|
||||||
|
)
|
||||||
|
| (
|
||||||
|
df_esm_preprocessed["questionnaire_id"]
|
||||||
|
== QUESTIONNAIRE_IDS["PANAS_negative_affect"]
|
||||||
|
)
|
||||||
]
|
]
|
||||||
df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS)
|
df_esm_PANAS_clean = clean_up_esm(df_esm_PANAS)
|
||||||
|
|
||||||
|
@ -126,8 +132,14 @@ df_SAM_all.head()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM = df_esm_preprocessed[
|
df_esm_SAM = df_esm_preprocessed[
|
||||||
(df_esm_preprocessed["questionnaire_id"] >= 87)
|
(
|
||||||
& (df_esm_preprocessed["questionnaire_id"] <= 93)
|
df_esm_preprocessed["questionnaire_id"]
|
||||||
|
>= QUESTIONNAIRE_IDS["appraisal_stressfulness_event"]
|
||||||
|
)
|
||||||
|
& (
|
||||||
|
df_esm_preprocessed["questionnaire_id"]
|
||||||
|
<= QUESTIONNAIRE_IDS["appraisal_stressfulness_period"]
|
||||||
|
)
|
||||||
]
|
]
|
||||||
df_esm_SAM_clean = clean_up_esm(df_esm_SAM)
|
df_esm_SAM_clean = clean_up_esm(df_esm_SAM)
|
||||||
|
|
||||||
|
@ -135,9 +147,10 @@ df_esm_SAM_clean = clean_up_esm(df_esm_SAM)
|
||||||
# ## Stressful events
|
# ## Stressful events
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_event = df_esm_SAM_clean[df_esm_SAM_clean["questionnaire_id"] == 87].assign(
|
df_esm_SAM_event = df_esm_SAM_clean[
|
||||||
stressful_event=lambda x: (x.esm_user_answer_numeric > 0)
|
df_esm_SAM_clean["questionnaire_id"]
|
||||||
)
|
== QUESTIONNAIRE_IDS["appraisal_stressfulness_event"]
|
||||||
|
].assign(stressful_event=lambda x: (x.esm_user_answer_numeric > 0))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_daily_events = (
|
df_esm_SAM_daily_events = (
|
||||||
|
@ -191,8 +204,8 @@ df_esm_SAM_daily = (
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_daily_threat_challenge = df_esm_SAM_daily[
|
df_esm_SAM_daily_threat_challenge = df_esm_SAM_daily[
|
||||||
(df_esm_SAM_daily["questionnaire_id"] == 88)
|
(df_esm_SAM_daily["questionnaire_id"] == QUESTIONNAIRE_IDS["appraisal_threat"])
|
||||||
| (df_esm_SAM_daily["questionnaire_id"] == 89)
|
| (df_esm_SAM_daily["questionnaire_id"] == QUESTIONNAIRE_IDS["appraisal_challenge"])
|
||||||
]
|
]
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@ -204,7 +217,8 @@ df_esm_SAM_summary_participant = (
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_event_stressfulness_summary_participant = df_esm_SAM_summary_participant[
|
df_esm_SAM_event_stressfulness_summary_participant = df_esm_SAM_summary_participant[
|
||||||
df_esm_SAM_summary_participant["questionnaire_id"] == 87
|
df_esm_SAM_summary_participant["questionnaire_id"]
|
||||||
|
== QUESTIONNAIRE_IDS["appraisal_stressfulness_event"]
|
||||||
]
|
]
|
||||||
df_esm_SAM_event_stressfulness_summary_participant.describe()["mean"]
|
df_esm_SAM_event_stressfulness_summary_participant.describe()["mean"]
|
||||||
|
|
||||||
|
@ -218,8 +232,8 @@ sns.displot(
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_threat_challenge_summary_participant = df_esm_SAM_summary_participant[
|
df_esm_SAM_threat_challenge_summary_participant = df_esm_SAM_summary_participant[
|
||||||
(df_esm_SAM_summary_participant["questionnaire_id"] == 88)
|
(df_esm_SAM_daily["questionnaire_id"] == QUESTIONNAIRE_IDS["appraisal_threat"])
|
||||||
| (df_esm_SAM_summary_participant["questionnaire_id"] == 89)
|
| (df_esm_SAM_daily["questionnaire_id"] == QUESTIONNAIRE_IDS["appraisal_challenge"])
|
||||||
]
|
]
|
||||||
df_esm_SAM_threat_challenge_summary_participant[
|
df_esm_SAM_threat_challenge_summary_participant[
|
||||||
"event subscale"
|
"event subscale"
|
||||||
|
@ -263,7 +277,8 @@ df_esm_SAM_threat_challenge_summary_participant.groupby("event subscale").descri
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_SAM_period_summary_participant = df_esm_SAM_summary_participant[
|
df_esm_SAM_period_summary_participant = df_esm_SAM_summary_participant[
|
||||||
df_esm_SAM_summary_participant["questionnaire_id"] == 93
|
df_esm_SAM_summary_participant["questionnaire_id"]
|
||||||
|
== QUESTIONNAIRE_IDS["appraisal_stressfulness_period"]
|
||||||
]
|
]
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@ -283,8 +298,8 @@ sns.displot(data=df_esm_SAM_period_summary_participant, x="std", binwidth=0.1)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
df_esm_JCQ_demand_control = df_esm_preprocessed[
|
df_esm_JCQ_demand_control = df_esm_preprocessed[
|
||||||
(df_esm_preprocessed["questionnaire_id"] >= 10)
|
(df_esm_preprocessed["questionnaire_id"] >= QUESTIONNAIRE_IDS["JCQ_job_demand"])
|
||||||
& (df_esm_preprocessed["questionnaire_id"] <= 11)
|
& (df_esm_preprocessed["questionnaire_id"] <= QUESTIONNAIRE_IDS["JCQ_job_control"])
|
||||||
]
|
]
|
||||||
df_esm_JCQ_demand_control_clean = clean_up_esm(df_esm_JCQ_demand_control)
|
df_esm_JCQ_demand_control_clean = clean_up_esm(df_esm_JCQ_demand_control)
|
||||||
|
|
||||||
|
@ -343,4 +358,11 @@ fig6.set_axis_labels(x_var="participant standard deviation", y_var="frequency")
|
||||||
if save_figs:
|
if save_figs:
|
||||||
fig5.figure.savefig("JCQ_std_participant.pdf", dpi=300)
|
fig5.figure.savefig("JCQ_std_participant.pdf", dpi=300)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # COPE Inventory
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
df_esm_COPE = df_esm_preprocessed[
|
||||||
|
(df_esm_preprocessed["questionnaire_id"] >= QUESTIONNAIRE_IDS["COPE_active"])
|
||||||
|
& (df_esm_preprocessed["questionnaire_id"] <= QUESTIONNAIRE_IDS["COPE_emotions"])
|
||||||
|
]
|
||||||
|
|
|
@ -20,11 +20,47 @@ ANSWER_DAY_OFF = "DayOff3421"
|
||||||
ANSWER_SET_EVENING = "DayFinishedSetEvening"
|
ANSWER_SET_EVENING = "DayFinishedSetEvening"
|
||||||
|
|
||||||
MAX_MORNING_LENGTH = 3
|
MAX_MORNING_LENGTH = 3
|
||||||
# When the participants was not yet at work at the time of the first (morning) EMA,
|
# When the participant was not yet at work at the time of the first (morning) EMA,
|
||||||
# only three items were answered.
|
# only three items were answered.
|
||||||
# Two sleep related items and one indicating NOT starting work yet.
|
# Two sleep related items and one indicating NOT starting work yet.
|
||||||
# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
|
# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
|
||||||
|
|
||||||
|
QUESTIONNAIRE_IDS = {
|
||||||
|
"sleep_quality": 1,
|
||||||
|
"PANAS_positive_affect": 8,
|
||||||
|
"PANAS_negative_affect": 9,
|
||||||
|
"JCQ_job_demand": 10,
|
||||||
|
"JCQ_job_control": 11,
|
||||||
|
"JCQ_supervisor_support": 12,
|
||||||
|
"JCQ_coworker_support": 13,
|
||||||
|
"PFITS_supervisor": 14,
|
||||||
|
"PFITS_coworkers": 15,
|
||||||
|
"UWES_vigor": 16,
|
||||||
|
"UWES_dedication": 17,
|
||||||
|
"UWES_absorption": 18,
|
||||||
|
"COPE_active": 19,
|
||||||
|
"COPE_support": 20,
|
||||||
|
"COPE_emotions": 21,
|
||||||
|
"balance_life_work": 22,
|
||||||
|
"balance_work_life": 23,
|
||||||
|
"recovery_experience_detachment": 24,
|
||||||
|
"recovery_experience_relaxation": 25,
|
||||||
|
"symptoms": 26,
|
||||||
|
"appraisal_stressfulness_event": 87,
|
||||||
|
"appraisal_threat": 88,
|
||||||
|
"appraisal_challenge": 89,
|
||||||
|
"appraisal_event_time": 90,
|
||||||
|
"appraisal_event_duration": 91,
|
||||||
|
"appraisal_event_work_related": 92,
|
||||||
|
"appraisal_stressfulness_period": 93,
|
||||||
|
"late_work": 94,
|
||||||
|
"work_hours": 95,
|
||||||
|
"left_work": 96,
|
||||||
|
"activities": 97,
|
||||||
|
"coffee_breaks": 98,
|
||||||
|
"at_work_yet": 99,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_esm_data(usernames: Collection) -> pd.DataFrame:
|
def get_esm_data(usernames: Collection) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -52,8 +88,10 @@ def get_esm_data(usernames: Collection) -> pd.DataFrame:
|
||||||
|
|
||||||
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
Convert timestamps and expand JSON column.
|
||||||
|
|
||||||
Convert timestamps into human-readable datetimes and dates
|
Convert timestamps into human-readable datetimes and dates
|
||||||
and expand the JSON column into several Pandas DF columns.
|
and expand the JSON column into several Pandas DF columns.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
@ -63,7 +101,8 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
df_esm_preprocessed: pd.DataFrame
|
df_esm_preprocessed: pd.DataFrame
|
||||||
A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
|
A dataframe with added columns: datetime in Ljubljana timezone
|
||||||
|
and all fields from ESM_JSON column.
|
||||||
"""
|
"""
|
||||||
df_esm = helper.get_date_from_timestamp(df_esm)
|
df_esm = helper.get_date_from_timestamp(df_esm)
|
||||||
|
|
||||||
|
@ -76,31 +115,39 @@ def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
||||||
def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
For each distinct EMA session, determine how the participant responded to it.
|
For each distinct EMA session, determine how the participant responded to it.
|
||||||
Possible outcomes are: SESSION_STATUS_UNANSWERED, SESSION_STATUS_DAY_FINISHED, and SESSION_STATUS_COMPLETE
|
|
||||||
|
Possible outcomes are: SESSION_STATUS_UNANSWERED, SESSION_STATUS_DAY_FINISHED,
|
||||||
|
and SESSION_STATUS_COMPLETE
|
||||||
|
|
||||||
This is done in three steps.
|
This is done in three steps.
|
||||||
|
|
||||||
First, the esm_status is considered.
|
First, the esm_status is considered.
|
||||||
If any of the ESMs in a session has a status *other than* "answered", then this session is taken as unfinished.
|
If any of the ESMs in a session has a status *other than* "answered",
|
||||||
|
then this session is taken as unfinished.
|
||||||
|
|
||||||
Second, the sessions which do not represent full questionnaires are identified.
|
Second, the sessions which do not represent full questionnaires are identified.
|
||||||
These are sessions where participants only marked they are finished with the day or have not yet started working.
|
These are sessions where participants only marked they are finished with the day
|
||||||
|
or have not yet started working.
|
||||||
|
|
||||||
Third, the sessions with only one item are marked with their trigger.
|
Third, the sessions with only one item are marked with their trigger.
|
||||||
We never offered questionnaires with single items, so we can be sure these are unfinished.
|
We never offered questionnaires with single items,
|
||||||
|
so we can be sure these are unfinished.
|
||||||
|
|
||||||
Finally, all sessions that remain are marked as completed.
|
Finally, all sessions that remain are marked as completed.
|
||||||
By going through different possibilities in expl_esm_adherence.ipynb, this turned out to be a reasonable option.
|
By going through different possibilities in expl_esm_adherence.ipynb,
|
||||||
|
this turned out to be a reasonable option.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df_esm_preprocessed: pd.DataFrame
|
df_esm_preprocessed: pd.DataFrame
|
||||||
A preprocessed dataframe of esm data, which must include the session ID (esm_session).
|
A preprocessed dataframe of esm data,
|
||||||
|
which must include the session ID (esm_session).
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
df_session_counts: pd.Dataframe
|
df_session_counts: pd.Dataframe
|
||||||
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with their statuses and the number of items.
|
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)
|
||||||
|
with their statuses and the number of items.
|
||||||
"""
|
"""
|
||||||
sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)
|
sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)
|
||||||
|
|
||||||
|
@ -155,17 +202,22 @@ def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.Dat
|
||||||
|
|
||||||
def classify_sessions_by_time(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
def classify_sessions_by_time(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
For each EMA session, determine the time of the first user answer and its time type (morning, workday, or evening.)
|
Classify EMA sessions into morning, workday, or evening.
|
||||||
|
|
||||||
|
For each EMA session, determine the time of the first user answer
|
||||||
|
and its time type (morning, workday, or evening).
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df_esm_preprocessed: pd.DataFrame
|
df_esm_preprocessed: pd.DataFrame
|
||||||
A preprocessed dataframe of esm data, which must include the session ID (esm_session).
|
A preprocessed dataframe of esm data,
|
||||||
|
which must include the session ID (esm_session).
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
df_session_time: pd.DataFrame
|
df_session_time: pd.DataFrame
|
||||||
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with their time type and timestamp of first answer.
|
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)
|
||||||
|
with their time type and timestamp of first answer.
|
||||||
"""
|
"""
|
||||||
df_session_time = (
|
df_session_time = (
|
||||||
df_esm_preprocessed.sort_values(["participant_id", "datetime_lj"])
|
df_esm_preprocessed.sort_values(["participant_id", "datetime_lj"])
|
||||||
|
@ -179,13 +231,17 @@ def classify_sessions_by_completion_time(
|
||||||
df_esm_preprocessed: pd.DataFrame,
|
df_esm_preprocessed: pd.DataFrame,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
The point of this function is to not only classify sessions by using the previously defined functions.
|
Classify sessions and correct the time type.
|
||||||
|
|
||||||
|
The point of this function is to not only classify sessions
|
||||||
|
by using the previously defined functions.
|
||||||
It also serves to "correct" the time type of some EMA sessions.
|
It also serves to "correct" the time type of some EMA sessions.
|
||||||
|
|
||||||
A morning questionnaire could seamlessly transition into a daytime questionnaire,
|
A morning questionnaire could seamlessly transition into a daytime questionnaire,
|
||||||
if the participant was already at work.
|
if the participant was already at work.
|
||||||
In this case, the "time" label changed mid-session.
|
In this case, the "time" label changed mid-session.
|
||||||
Because of the way classify_sessions_by_time works, this questionnaire was classified as "morning".
|
Because of the way classify_sessions_by_time works,
|
||||||
|
this questionnaire was classified as "morning".
|
||||||
But for all intents and purposes, it can be treated as a "daytime" EMA.
|
But for all intents and purposes, it can be treated as a "daytime" EMA.
|
||||||
|
|
||||||
The way this scenario is differentiated from a true "morning" questionnaire,
|
The way this scenario is differentiated from a true "morning" questionnaire,
|
||||||
|
@ -194,13 +250,16 @@ def classify_sessions_by_completion_time(
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df_esm_preprocessed: pd.DataFrame
|
df_esm_preprocessed: pd.DataFrame
|
||||||
A preprocessed dataframe of esm data, which must include the session ID (esm_session).
|
A preprocessed dataframe of esm data,
|
||||||
|
which must include the session ID (esm_session).
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
df_session_counts_time: pd.DataFrame
|
df_session_counts_time: pd.DataFrame
|
||||||
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with statuses, the number of items,
|
A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with statuses,
|
||||||
their time type (with some morning EMAs reclassified) and timestamp of first answer.
|
the number of items,
|
||||||
|
their time type (with some morning EMAs reclassified)
|
||||||
|
and timestamp of first answer.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
|
df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
|
||||||
|
@ -219,7 +278,8 @@ def classify_sessions_by_completion_time(
|
||||||
|
|
||||||
def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
This function eliminates invalid ESM responses.
|
Eliminate invalid ESM responses.
|
||||||
|
|
||||||
It removes unanswered ESMs and those that indicate end of work and similar.
|
It removes unanswered ESMs and those that indicate end of work and similar.
|
||||||
It also extracts a numeric answer from strings such as "4 - I strongly agree".
|
It also extracts a numeric answer from strings such as "4 - I strongly agree".
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue