import datetime from collections.abc import Collection import pandas as pd from pytz import timezone from config.models import ESM, Participant from setup import db_engine, session TZ_LJ = timezone("Europe/Ljubljana") def get_esm_data(usernames: Collection) -> pd.DataFrame: """ Read the data from the esm table and return it in a dataframe. Parameters ---------- usernames: Collection A list of usernames to put into the WHERE condition. Returns ------- df_esm: pd.DataFrame A dataframe of esm data. """ query_esm = ( session.query(ESM, Participant.username) .filter(Participant.id == ESM.participant_id) .filter(Participant.username.in_(usernames)) ) with db_engine.connect() as connection: df_esm = pd.read_sql(query_esm.statement, connection) return df_esm def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply( lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) ) df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop( columns=["esm_trigger"] ) # The esm_trigger column is already present in the main df. return df_esm.join(df_esm_json)