stress_at_work_analysis/features/esm.py

45 lines
1.3 KiB
Python

import datetime
from collections.abc import Collection
import pandas as pd
from pytz import timezone
from config.models import ESM, Participant
from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
def get_esm_data(usernames: Collection) -> pd.DataFrame:
"""
Read the data from the esm table and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
-------
df_esm: pd.DataFrame
A dataframe of esm data.
"""
query_esm = (
session.query(ESM, Participant.username)
.filter(Participant.id == ESM.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_esm = pd.read_sql(query_esm.statement, connection)
return df_esm
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(
columns=["esm_trigger"]
) # The esm_trigger column is already present in the main df.
return df_esm.join(df_esm_json)