stress_at_work_analysis/features/esm.py

from collections.abc import Collection

import numpy as np
import pandas as pd

from config.models import ESM, Participant
from features import helper
from setup import db_engine, session

ESM_STATUS_ANSWERED = 2

GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]

SESSION_STATUS_UNANSWERED = "ema_unanswered"
SESSION_STATUS_DAY_FINISHED = "day_finished"
SESSION_STATUS_COMPLETE = "ema_completed"

ANSWER_DAY_FINISHED = "DayFinished3421"
ANSWER_DAY_OFF = "DayOff3421"
ANSWER_SET_EVENING = "DayFinishedSetEvening"

MAX_MORNING_LENGTH = 3
# When the participant was not yet at work at the time of the first (morning) EMA,
# only three items were answered.
# Two sleep related items and one indicating NOT starting work yet.
# Daytime EMAs are all longer, in fact they always consist of at least 6 items.

QUESTIONNAIRE_IDS = {
    "sleep_quality": 1,
    "PANAS_positive_affect": 8,
    "PANAS_negative_affect": 9,
    "JCQ_job_demand": 10,
    "JCQ_job_control": 11,
    "JCQ_supervisor_support": 12,
    "JCQ_coworker_support": 13,
    "PFITS_supervisor": 14,
    "PFITS_coworkers": 15,
    "UWES_vigor": 16,
    "UWES_dedication": 17,
    "UWES_absorption": 18,
    "COPE_active": 19,
    "COPE_support": 20,
    "COPE_emotions": 21,
    "balance_life_work": 22,
    "balance_work_life": 23,
    "recovery_experience_detachment": 24,
    "recovery_experience_relaxation": 25,
    "symptoms": 26,
    "appraisal_stressfulness_event": 87,
    "appraisal_threat": 88,
    "appraisal_challenge": 89,
    "appraisal_event_time": 90,
    "appraisal_event_duration": 91,
    "appraisal_event_work_related": 92,
    "appraisal_stressfulness_period": 93,
    "late_work": 94,
    "work_hours": 95,
    "left_work": 96,
    "activities": 97,
    "coffee_breaks": 98,
    "at_work_yet": 99,
}


def get_esm_data(usernames: Collection) -> pd.DataFrame:
    """
    Read the data from the esm table and return it in a dataframe.

    Parameters
    ----------
    usernames: Collection
        A list of usernames to put into the WHERE condition.

    Returns
    -------
    df_esm: pd.DataFrame
        A dataframe of esm data.
    """
    query_esm = (
        session.query(ESM, Participant.username)
        .filter(Participant.id == ESM.participant_id)
        .filter(Participant.username.in_(usernames))
    )
    with db_engine.connect() as connection:
        df_esm = pd.read_sql(query_esm.statement, connection)
    return df_esm


def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
    """
    Convert timestamps and expand JSON column.

    Convert timestamps into human-readable datetimes and dates
        and expand the JSON column into several Pandas DF columns.

    Parameters
    ----------
    df_esm: pd.DataFrame
        A dataframe of esm data.

    Returns
    -------
    df_esm_preprocessed: pd.DataFrame
        A dataframe with added columns: datetime in Ljubljana timezone
            and all fields from ESM_JSON column.
    """
    df_esm = helper.get_date_from_timestamp(df_esm)

    df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(
        columns=["esm_trigger"]
    )  # The esm_trigger column is already present in the main df.
    return df_esm.join(df_esm_json)


def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    For each distinct EMA session, determine how the participant responded to it.

    Possible outcomes are: SESSION_STATUS_UNANSWERED, SESSION_STATUS_DAY_FINISHED,
        and SESSION_STATUS_COMPLETE

    This is done in three steps.

    First, the esm_status is considered.
    If any of the ESMs in a session has a status *other than* "answered",
        then this session is taken as unfinished.

    Second, the sessions which do not represent full questionnaires are identified.
    These are sessions where participants only marked they are finished with the day
        or have not yet started working.

    Third, the sessions with only one item are marked with their trigger.
    We never offered questionnaires with single items,
        so we can be sure these are unfinished.

    Finally, all sessions that remain are marked as completed.
    By going through different possibilities in expl_esm_adherence.ipynb,
        this turned out to be a reasonable option.

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data,
            which must include the session ID (esm_session).

    Returns
    -------
    df_session_counts: pd.Dataframe
        A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)
            with their statuses and the number of items.
    """
    sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)

    # 0. First, assign all session statuses as NaN.
    df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(
        columns={"id": "esm_session_count"}
    )
    df_session_counts["session_response"] = np.nan

    # 1. Identify all ESMs with status other than answered.
    esm_not_answered = sessions_grouped.apply(
        lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any()
    )
    df_session_counts.loc[
        esm_not_answered, "session_response"
    ] = SESSION_STATUS_UNANSWERED

    # 2. Identify non-sessions, i.e. answers about the end of the day.
    non_session = sessions_grouped.apply(
        lambda x: (
            (x.esm_user_answer == ANSWER_DAY_FINISHED)  # I finished working for today.
            | (x.esm_user_answer == ANSWER_DAY_OFF)  # I am not going to work today.
            | (
                x.esm_user_answer == ANSWER_SET_EVENING
            )  # When would you like to answer the evening EMA?
        ).any()
    )
    df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED

    # 3. Identify sessions appearing only once, as those were not true EMAs for sure.
    singleton_sessions = (df_session_counts.esm_session_count == 1) & (
        df_session_counts.session_response.isna()
    )
    df_session_1 = df_session_counts[singleton_sessions]
    df_esm_unique_session = df_session_1.join(
        df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left"
    )
    df_esm_unique_session = df_esm_unique_session.assign(
        session_response=lambda x: x.esm_trigger
    )["session_response"]
    df_session_counts.loc[
        df_esm_unique_session.index, "session_response"
    ] = df_esm_unique_session

    # 4. Mark the remaining sessions as completed.
    df_session_counts.loc[
        df_session_counts.session_response.isna(), "session_response"
    ] = SESSION_STATUS_COMPLETE

    return df_session_counts


def classify_sessions_by_time(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    Classify EMA sessions into morning, workday, or evening.

    For each EMA session, determine the time of the first user answer
        and its time type (morning, workday, or evening).

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data,
            which must include the session ID (esm_session).

    Returns
    -------
    df_session_time: pd.DataFrame
        A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)
            with their time type and timestamp of first answer.
    """
    df_session_time = (
        df_esm_preprocessed.sort_values(["participant_id", "datetime_lj"])
        .groupby(GROUP_SESSIONS_BY)
        .first()[["time", "datetime_lj"]]
    )
    return df_session_time


def classify_sessions_by_completion_time(
    df_esm_preprocessed: pd.DataFrame,
) -> pd.DataFrame:
    """
    Classify sessions and correct the time type.

    The point of this function is to not only classify sessions
        by using the previously defined functions.
    It also serves to "correct" the time type of some EMA sessions.

    A morning questionnaire could seamlessly transition into a daytime questionnaire,
        if the participant was already at work.
    In this case, the "time" label changed mid-session.
    Because of the way classify_sessions_by_time works,
        this questionnaire was classified as "morning".
    But for all intents and purposes, it can be treated as a "daytime" EMA.

    The way this scenario is differentiated from a true "morning" questionnaire,
        where the participants NOT yet at work, is by considering their length.

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data,
            which must include the session ID (esm_session).

    Returns
    -------
    df_session_counts_time: pd.DataFrame
        A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with statuses,
            the number of items,
            their time type (with some morning EMAs reclassified)
            and timestamp of first answer.

    """
    df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)
    df_session_time = classify_sessions_by_time(df_esm_preprocessed)

    df_session_counts_time = df_session_time.join(df_session_counts)

    morning_transition_to_daytime = (df_session_counts_time.time == "morning") & (
        df_session_counts_time.esm_session_count > MAX_MORNING_LENGTH
    )

    df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime"

    return df_session_counts_time


def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    Eliminate invalid ESM responses.

    It removes unanswered ESMs and those that indicate end of work and similar.
    It also extracts a numeric answer from strings such as "4 - I strongly agree".

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data.

    Returns
    -------
    df_esm_clean: pd.DataFrame
        A subset of the original dataframe.

    """
    df_esm_clean = df_esm_preprocessed[
        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
    ]
    df_esm_clean = df_esm_clean[
        ~df_esm_clean["esm_user_answer"].isin(
            [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
        )
    ]
    df_esm_clean["esm_user_answer_numeric"] = np.nan
    esm_type_numeric = [
        ESM.ESM_TYPE.get("radio"),
        ESM.ESM_TYPE.get("scale"),
        ESM.ESM_TYPE.get("number"),
    ]
    df_esm_clean.loc[
        df_esm_clean["esm_type"].isin(esm_type_numeric)
    ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(
        esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
            int
        )
    )
    return df_esm_clean


def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1):
    """
    Increment answers to keep in line with original scoring.

    We always used 0 for the lowest value of user answer.
    Some scales originally used other scoring, such as starting from 1.
    This restores original scoring so that the values are comparable to references.

    Parameters
    ----------
    df_esm_clean: pd.DataFrame
        A cleaned ESM dataframe, which must also include esm_user_answer_numeric.
    increment_by:
        A number to add to the user answer.

    Returns
    -------
    df_esm_clean: pd.DataFrame
        The same df with addition of a column 'esm_user_answer_numeric'.

    """
    try:
        df_esm_clean = df_esm_clean.assign(
            esm_user_score=lambda x: x.esm_user_answer_numeric + increment_by
        )
    except AttributeError as e:
        print("Please, clean the dataframe first using features.esm.clean_up_esm.")
        print(e)
    return df_esm_clean
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`from collections.abc import Collection`

[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`import numpy as np`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`import pandas as pd`

			`from config.models import ESM, Participant`
Use the same function for ESM and other data. 2021-08-11 17:26:44 +02:00			`from features import helper`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`from setup import db_engine, session`

Identify unique sessions and assign status. Use CONSTANT variables for these statuses. 2021-06-11 13:50:24 +02:00			`ESM_STATUS_ANSWERED = 2`

			`GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]`

			`SESSION_STATUS_UNANSWERED = "ema_unanswered"`
			`SESSION_STATUS_DAY_FINISHED = "day_finished"`
			`SESSION_STATUS_COMPLETE = "ema_completed"`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00
Clean up ESM by eliminating non-answers. Convert radio string answer to numeric. 2021-07-03 16:34:11 +02:00			`ANSWER_DAY_FINISHED = "DayFinished3421"`
			`ANSWER_DAY_OFF = "DayOff3421"`
			`ANSWER_SET_EVENING = "DayFinishedSetEvening"`

Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00			`MAX_MORNING_LENGTH = 3`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`# When the participant was not yet at work at the time of the first (morning) EMA,`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00			`# only three items were answered.`
			`# Two sleep related items and one indicating NOT starting work yet.`
			`# Daytime EMAs are all longer, in fact they always consist of at least 6 items.`

Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`QUESTIONNAIRE_IDS = {`
			`"sleep_quality": 1,`
			`"PANAS_positive_affect": 8,`
			`"PANAS_negative_affect": 9,`
			`"JCQ_job_demand": 10,`
			`"JCQ_job_control": 11,`
			`"JCQ_supervisor_support": 12,`
			`"JCQ_coworker_support": 13,`
			`"PFITS_supervisor": 14,`
			`"PFITS_coworkers": 15,`
			`"UWES_vigor": 16,`
			`"UWES_dedication": 17,`
			`"UWES_absorption": 18,`
			`"COPE_active": 19,`
			`"COPE_support": 20,`
			`"COPE_emotions": 21,`
			`"balance_life_work": 22,`
			`"balance_work_life": 23,`
			`"recovery_experience_detachment": 24,`
			`"recovery_experience_relaxation": 25,`
			`"symptoms": 26,`
			`"appraisal_stressfulness_event": 87,`
			`"appraisal_threat": 88,`
			`"appraisal_challenge": 89,`
			`"appraisal_event_time": 90,`
			`"appraisal_event_duration": 91,`
			`"appraisal_event_work_related": 92,`
			`"appraisal_stressfulness_period": 93,`
			`"late_work": 94,`
			`"work_hours": 95,`
			`"left_work": 96,`
			`"activities": 97,`
			`"coffee_breaks": 98,`
			`"at_work_yet": 99,`
			`}`

Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00
			`def get_esm_data(usernames: Collection) -> pd.DataFrame:`
			`"""`
			`Read the data from the esm table and return it in a dataframe.`

			`Parameters`
			`----------`
			`usernames: Collection`
			`A list of usernames to put into the WHERE condition.`

			`Returns`
			`-------`
			`df_esm: pd.DataFrame`
Look at the ESM data and test JSON expansion. 2021-06-01 12:10:42 +02:00			`A dataframe of esm data.`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`"""`
			`query_esm = (`
			`session.query(ESM, Participant.username)`
			`.filter(Participant.id == ESM.participant_id)`
			`.filter(Participant.username.in_(usernames))`
			`)`
			`with db_engine.connect() as connection:`
			`df_esm = pd.read_sql(query_esm.statement, connection)`
			`return df_esm`


			`def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:`
Document the preprocess_esm function. 2021-06-07 16:50:27 +02:00			`"""`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`Convert timestamps and expand JSON column.`

Add date part of datetime when preprocessing. 2021-07-03 16:39:32 +02:00			`Convert timestamps into human-readable datetimes and dates`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`and expand the JSON column into several Pandas DF columns.`
Document the preprocess_esm function. 2021-06-07 16:50:27 +02:00
			`Parameters`
			`----------`
			`df_esm: pd.DataFrame`
			`A dataframe of esm data.`

			`Returns`
			`-------`
			`df_esm_preprocessed: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A dataframe with added columns: datetime in Ljubljana timezone`
			`and all fields from ESM_JSON column.`
Document the preprocess_esm function. 2021-06-07 16:50:27 +02:00			`"""`
Move datetime calculation to a separate function. 2021-08-11 17:19:14 +02:00			`df_esm = helper.get_date_from_timestamp(df_esm)`

Study session ID in depth. 2021-06-02 18:35:00 +02:00			`df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(`
			`columns=["esm_trigger"]`
			`) # The esm_trigger column is already present in the main df.`
Expand ESM_JSON column and add esm example data. 2021-06-01 17:57:08 +02:00			`return df_esm.join(df_esm_json)`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00

Add a function to determine EMA session time. 2021-06-11 16:34:09 +02:00			`def classify_sessions_by_completion(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`"""`
			`For each distinct EMA session, determine how the participant responded to it.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00
			`Possible outcomes are: SESSION_STATUS_UNANSWERED, SESSION_STATUS_DAY_FINISHED,`
			`and SESSION_STATUS_COMPLETE`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`This is done in three steps.`

			`First, the esm_status is considered.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`If any of the ESMs in a session has a status other than "answered",`
			`then this session is taken as unfinished.`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00
			`Second, the sessions which do not represent full questionnaires are identified.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`These are sessions where participants only marked they are finished with the day`
			`or have not yet started working.`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00
			`Third, the sessions with only one item are marked with their trigger.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`We never offered questionnaires with single items,`
			`so we can be sure these are unfinished.`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00
			`Finally, all sessions that remain are marked as completed.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`By going through different possibilities in expl_esm_adherence.ipynb,`
			`this turned out to be a reasonable option.`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
			`Parameters`
			`----------`
			`df_esm_preprocessed: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A preprocessed dataframe of esm data,`
			`which must include the session ID (esm_session).`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
			`Returns`
			`-------`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`df_session_counts: pd.Dataframe`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)`
			`with their statuses and the number of items.`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`"""`
Identify unique sessions and assign status. Use CONSTANT variables for these statuses. 2021-06-11 13:50:24 +02:00			`sessions_grouped = df_esm_preprocessed.groupby(GROUP_SESSIONS_BY)`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`# 0. First, assign all session statuses as NaN.`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(`
			`columns={"id": "esm_session_count"}`
			`)`
Only convert some answers to numeric. 2021-07-04 14:34:13 +02:00			`df_session_counts["session_response"] = np.nan`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`# 1. Identify all ESMs with status other than answered.`
			`esm_not_answered = sessions_grouped.apply(`
			`lambda x: (x.esm_status != ESM_STATUS_ANSWERED).any()`
			`)`
			`df_session_counts.loc[`
			`esm_not_answered, "session_response"`
			`] = SESSION_STATUS_UNANSWERED`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`# 2. Identify non-sessions, i.e. answers about the end of the day.`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`non_session = sessions_grouped.apply(`
			`lambda x: (`
Clean up ESM by eliminating non-answers. Convert radio string answer to numeric. 2021-07-03 16:34:11 +02:00			`(x.esm_user_answer == ANSWER_DAY_FINISHED) # I finished working for today.`
			`\| (x.esm_user_answer == ANSWER_DAY_OFF) # I am not going to work today.`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`\| (`
Clean up ESM by eliminating non-answers. Convert radio string answer to numeric. 2021-07-03 16:34:11 +02:00			`x.esm_user_answer == ANSWER_SET_EVENING`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`) # When would you like to answer the evening EMA?`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`).any()`
			`)`
Identify unique sessions and assign status. Use CONSTANT variables for these statuses. 2021-06-11 13:50:24 +02:00			`df_session_counts.loc[non_session, "session_response"] = SESSION_STATUS_DAY_FINISHED`

Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`# 3. Identify sessions appearing only once, as those were not true EMAs for sure.`
			`singleton_sessions = (df_session_counts.esm_session_count == 1) & (`
			`df_session_counts.session_response.isna()`
			`)`
Identify unique sessions and assign status. Use CONSTANT variables for these statuses. 2021-06-11 13:50:24 +02:00			`df_session_1 = df_session_counts[singleton_sessions]`
			`df_esm_unique_session = df_session_1.join(`
			`df_esm_preprocessed.set_index(GROUP_SESSIONS_BY), how="left"`
			`)`
Finish labelling EMA sessions and document classify_sessions_adherence function. 2021-06-11 14:50:14 +02:00			`df_esm_unique_session = df_esm_unique_session.assign(`
			`session_response=lambda x: x.esm_trigger`
			`)["session_response"]`
			`df_session_counts.loc[`
			`df_esm_unique_session.index, "session_response"`
			`] = df_esm_unique_session`

			`# 4. Mark the remaining sessions as completed.`
			`df_session_counts.loc[`
			`df_session_counts.session_response.isna(), "session_response"`
			`] = SESSION_STATUS_COMPLETE`
[WIP] Start calculating concordance. Note, workday and morning EMAs have not been properly dealt with, but assumed answered. 2021-06-08 16:07:39 +02:00
			`return df_session_counts`
Add a function to determine EMA session time. 2021-06-11 16:34:09 +02:00

			`def classify_sessions_by_time(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:`
			`"""`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`Classify EMA sessions into morning, workday, or evening.`

			`For each EMA session, determine the time of the first user answer`
			`and its time type (morning, workday, or evening).`
Add a function to determine EMA session time. 2021-06-11 16:34:09 +02:00
			`Parameters`
			`----------`
			`df_esm_preprocessed: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A preprocessed dataframe of esm data,`
			`which must include the session ID (esm_session).`
Add a function to determine EMA session time. 2021-06-11 16:34:09 +02:00
			`Returns`
			`-------`
			`df_session_time: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A dataframe of all sessions (grouped by GROUP_SESSIONS_BY)`
			`with their time type and timestamp of first answer.`
Add a function to determine EMA session time. 2021-06-11 16:34:09 +02:00			`"""`
			`df_session_time = (`
			`df_esm_preprocessed.sort_values(["participant_id", "datetime_lj"])`
			`.groupby(GROUP_SESSIONS_BY)`
			`.first()[["time", "datetime_lj"]]`
			`)`
			`return df_session_time`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00

			`def classify_sessions_by_completion_time(`
			`df_esm_preprocessed: pd.DataFrame,`
			`) -> pd.DataFrame:`
			`"""`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`Classify sessions and correct the time type.`

			`The point of this function is to not only classify sessions`
			`by using the previously defined functions.`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00			`It also serves to "correct" the time type of some EMA sessions.`

			`A morning questionnaire could seamlessly transition into a daytime questionnaire,`
			`if the participant was already at work.`
			`In this case, the "time" label changed mid-session.`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`Because of the way classify_sessions_by_time works,`
			`this questionnaire was classified as "morning".`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00			`But for all intents and purposes, it can be treated as a "daytime" EMA.`

			`The way this scenario is differentiated from a true "morning" questionnaire,`
			`where the participants NOT yet at work, is by considering their length.`

			`Parameters`
			`----------`
			`df_esm_preprocessed: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A preprocessed dataframe of esm data,`
			`which must include the session ID (esm_session).`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00
			`Returns`
			`-------`
Fix a bug where evening EMAs where also reclassified as daytime. 2021-06-11 20:17:17 +02:00			`df_session_counts_time: pd.DataFrame`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`A dataframe of all sessions (grouped by GROUP_SESSIONS_BY) with statuses,`
			`the number of items,`
			`their time type (with some morning EMAs reclassified)`
			`and timestamp of first answer.`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00
			`"""`
			`df_session_counts = classify_sessions_by_completion(df_esm_preprocessed)`
			`df_session_time = classify_sessions_by_time(df_esm_preprocessed)`

			`df_session_counts_time = df_session_time.join(df_session_counts)`

Fix a bug where evening EMAs where also reclassified as daytime. 2021-06-11 20:17:17 +02:00			`morning_transition_to_daytime = (df_session_counts_time.time == "morning") & (`
			`df_session_counts_time.esm_session_count > MAX_MORNING_LENGTH`
			`)`

			`df_session_counts_time.loc[morning_transition_to_daytime, "time"] = "daytime"`
Add a function to correct the morning questionnaires into daytime. 2021-06-11 16:44:33 +02:00
			`return df_session_counts_time`
Start exploring PANAS data. Add a function to clean up ESM data. 2021-07-02 16:33:48 +02:00

			`def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:`
			`"""`
Define QUESTIONNAIRE IDs and use them. Clean up docstrings. 2023-07-03 17:09:15 +02:00			`Eliminate invalid ESM responses.`

Implement a method to recode JCQ answers. 2021-07-05 18:24:22 +02:00			`It removes unanswered ESMs and those that indicate end of work and similar.`
			`It also extracts a numeric answer from strings such as "4 - I strongly agree".`
Start exploring PANAS data. Add a function to clean up ESM data. 2021-07-02 16:33:48 +02:00
			`Parameters`
			`----------`
			`df_esm_preprocessed: pd.DataFrame`
			`A preprocessed dataframe of esm data.`

			`Returns`
			`-------`
			`df_esm_clean: pd.DataFrame`
			`A subset of the original dataframe.`

			`"""`
			`df_esm_clean = df_esm_preprocessed[`
			`df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED`
			`]`
Clean up ESM by eliminating non-answers. Convert radio string answer to numeric. 2021-07-03 16:34:11 +02:00			`df_esm_clean = df_esm_clean[`
			`~df_esm_clean["esm_user_answer"].isin(`
			`[ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]`
			`)`
			`]`
Only convert some answers to numeric. 2021-07-04 14:34:13 +02:00			`df_esm_clean["esm_user_answer_numeric"] = np.nan`
Fix formatting. 2021-07-04 14:34:57 +02:00			`esm_type_numeric = [`
			`ESM.ESM_TYPE.get("radio"),`
			`ESM.ESM_TYPE.get("scale"),`
			`ESM.ESM_TYPE.get("number"),`
			`]`
Fix assignment to use loc. For assigning a value to selected rows (a subset), regular slicing using [] produces a KeyError. 2021-08-11 14:53:59 +02:00			`df_esm_clean.loc[`
Fix formatting. 2021-07-04 14:34:57 +02:00			`df_esm_clean["esm_type"].isin(esm_type_numeric)`
Fix assignment to use loc. For assigning a value to selected rows (a subset), regular slicing using [] produces a KeyError. 2021-08-11 14:53:59 +02:00			`] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(`
Clean up ESM by eliminating non-answers. Convert radio string answer to numeric. 2021-07-03 16:34:11 +02:00			`esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(`
			`int`
			`)`
			`)`
Start exploring PANAS data. Add a function to clean up ESM data. 2021-07-02 16:33:48 +02:00			`return df_esm_clean`
Extract method to reuse and simplify. 2023-07-03 20:52:08 +02:00

			`def increment_answers(df_esm_clean: pd.DataFrame, increment_by=1):`
			`"""`
			`Increment answers to keep in line with original scoring.`

			`We always used 0 for the lowest value of user answer.`
			`Some scales originally used other scoring, such as starting from 1.`
			`This restores original scoring so that the values are comparable to references.`

			`Parameters`
			`----------`
			`df_esm_clean: pd.DataFrame`
			`A cleaned ESM dataframe, which must also include esm_user_answer_numeric.`
			`increment_by:`
			`A number to add to the user answer.`

			`Returns`
			`-------`
			`df_esm_clean: pd.DataFrame`
			`The same df with addition of a column 'esm_user_answer_numeric'.`

			`"""`
			`try:`
			`df_esm_clean = df_esm_clean.assign(`
			`esm_user_score=lambda x: x.esm_user_answer_numeric + increment_by`
			`)`
			`except AttributeError as e:`
			`print("Please, clean the dataframe first using features.esm.clean_up_esm.")`
			`print(e)`
			`return df_esm_clean`