stress_at_work_analysis/features/esm.py

import datetime
from collections.abc import Collection

import numpy as np
import pandas as pd
from pytz import timezone

from config.models import ESM, Participant
from setup import db_engine, session

TZ_LJ = timezone("Europe/Ljubljana")


def get_esm_data(usernames: Collection) -> pd.DataFrame:
    """
    Read the data from the esm table and return it in a dataframe.

    Parameters
    ----------
    usernames: Collection
        A list of usernames to put into the WHERE condition.

    Returns
    -------
    df_esm: pd.DataFrame
        A dataframe of esm data.
    """
    query_esm = (
        session.query(ESM, Participant.username)
        .filter(Participant.id == ESM.participant_id)
        .filter(Participant.username.in_(usernames))
    )
    with db_engine.connect() as connection:
        df_esm = pd.read_sql(query_esm.statement, connection)
    return df_esm


def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
    """
    Convert timestamps into human-readable datetimes and expand the JSON column into several Pandas DF columns.

    Parameters
    ----------
    df_esm: pd.DataFrame
        A dataframe of esm data.

    Returns
    -------
    df_esm_preprocessed: pd.DataFrame
        A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
    """
    df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
        lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
    )
    df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(
        columns=["esm_trigger"]
    )  # The esm_trigger column is already present in the main df.
    return df_esm.join(df_esm_json)


def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    For each distinct EMA session, determine how the participant responded to it.
    Possible outcomes are: esm_unanswered

    This is done in several steps.
    #TODO Finish the documentation.

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data, which must include the session ID (esm_session).

    Returns
    -------
    some dataframe
    """
    sessions_grouped = df_esm_preprocessed.groupby(
        ["participant_id", "device_id", "esm_session"]
    )

    df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(
        columns={"id": "esm_session_count"}
    )
    df_session_counts["session_response"] = np.NaN

    esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != 2).any())
    df_session_counts.loc[esm_not_answered, "session_response"] = "esm_unanswered"

    non_session = sessions_grouped.apply(
        lambda x: (
            (x.esm_user_answer == "DayFinished3421")  # I finished working for today.
            | (x.esm_user_answer == "DayOff3421")  # I am not going to work today.
            | (x.esm_user_answer == "DayFinishedSetEvening")  # When would you like to answer the evening EMA?
        ).any()
    )
    df_session_counts.loc[non_session, "session_response"] = "day_finished"

    finished_sessions = sessions_grouped.apply(
        lambda x: (x.esm_trigger.str.endswith("_last")).any()
    )
    df_session_counts.loc[finished_sessions, "session_response"] = "esm_finished"

    # TODO Look at evening-evening_last sequence, if everything is caught with finished sessions

    # TODO What can be done about morning EMA, perhaps morning-morning_first (sic!) sequence?

    # TODO What can be done about workday EMA.

    df_session_counts.loc[df_session_counts.session_response.isna(), "session_response"] = "esm_finished"
    # TODO But for now, simply take all other ESMs as answered.

    return df_session_counts
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`import datetime`
			`from collections.abc import Collection`

[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`import numpy as np`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`import pandas as pd`
			`from pytz import timezone`

			`from config.models import ESM, Participant`
			`from setup import db_engine, session`

			`TZ_LJ = timezone("Europe/Ljubljana")`


			`def get_esm_data(usernames: Collection) -> pd.DataFrame:`
			`"""`
			`Read the data from the esm table and return it in a dataframe.`

			`Parameters`
			`----------`
			`usernames: Collection`
			`A list of usernames to put into the WHERE condition.`

			`Returns`
			`-------`
			`df_esm: pd.DataFrame`
Look at the ESM data and test JSON expansion. 2021-06-01 12:10:42 +02:00			`A dataframe of esm data.`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`"""`
			`query_esm = (`
			`session.query(ESM, Participant.username)`
			`.filter(Participant.id == ESM.participant_id)`
			`.filter(Participant.username.in_(usernames))`
			`)`
			`with db_engine.connect() as connection:`
			`df_esm = pd.read_sql(query_esm.statement, connection)`
			`return df_esm`


			`def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:`
Document the preprocess_esm function. 2021-06-07 16:50:27 +02:00			`"""`
			`Convert timestamps into human-readable datetimes and expand the JSON column into several Pandas DF columns.`

			`Parameters`
			`----------`
			`df_esm: pd.DataFrame`
			`A dataframe of esm data.`

			`Returns`
			`-------`
			`df_esm_preprocessed: pd.DataFrame`
			`A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.`
			`"""`
Add a method to transform ESM data and the test. 2021-05-27 18:10:34 +02:00			`df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(`
			`lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)`
			`)`
Study session ID in depth. 2021-06-02 18:35:00 +02:00			`df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(`
			`columns=["esm_trigger"]`
			`) # The esm_trigger column is already present in the main df.`
Expand ESM_JSON column and add esm example data. 2021-06-01 17:57:08 +02:00			`return df_esm.join(df_esm_json)`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00

			`def classify_sessions_adherence(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:`
			`"""`
			`For each distinct EMA session, determine how the participant responded to it.`
			`Possible outcomes are: esm_unanswered`

			`This is done in several steps.`
			`#TODO Finish the documentation.`

			`Parameters`
			`----------`
			`df_esm_preprocessed: pd.DataFrame`
			`A preprocessed dataframe of esm data, which must include the session ID (esm_session).`

			`Returns`
			`-------`
			`some dataframe`
			`"""`
			`sessions_grouped = df_esm_preprocessed.groupby(`
			`["participant_id", "device_id", "esm_session"]`
			`)`

			`df_session_counts = pd.DataFrame(sessions_grouped.count()["id"]).rename(`
			`columns={"id": "esm_session_count"}`
			`)`
			`df_session_counts["session_response"] = np.NaN`

			`esm_not_answered = sessions_grouped.apply(lambda x: (x.esm_status != 2).any())`
			`df_session_counts.loc[esm_not_answered, "session_response"] = "esm_unanswered"`

			`non_session = sessions_grouped.apply(`
			`lambda x: (`
Take the evening EMA time question into account as non-answered session. 2021-06-09 17:29:42 +02:00			`(x.esm_user_answer == "DayFinished3421") # I finished working for today.`
			`\| (x.esm_user_answer == "DayOff3421") # I am not going to work today.`
			`\| (x.esm_user_answer == "DayFinishedSetEvening") # When would you like to answer the evening EMA?`
[WIP] Prepare a function to classify adherence and illustrate steps in Jupyter Notebook. 2021-06-07 19:32:38 +02:00			`).any()`
			`)`
			`df_session_counts.loc[non_session, "session_response"] = "day_finished"`

			`finished_sessions = sessions_grouped.apply(`
			`lambda x: (x.esm_trigger.str.endswith("_last")).any()`
			`)`
			`df_session_counts.loc[finished_sessions, "session_response"] = "esm_finished"`

			`# TODO Look at evening-evening_last sequence, if everything is caught with finished sessions`

			`# TODO What can be done about morning EMA, perhaps morning-morning_first (sic!) sequence?`

			`# TODO What can be done about workday EMA.`

[WIP] Start calculating concordance. Note, workday and morning EMAs have not been properly dealt with, but assumed answered. 2021-06-08 16:07:39 +02:00			`df_session_counts.loc[df_session_counts.session_response.isna(), "session_response"] = "esm_finished"`
			`# TODO But for now, simply take all other ESMs as answered.`

			`return df_session_counts`