45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
import datetime
|
|
from collections.abc import Collection
|
|
|
|
import pandas as pd
|
|
from pytz import timezone
|
|
|
|
from config.models import ESM, Participant
|
|
from setup import db_engine, session
|
|
|
|
TZ_LJ = timezone("Europe/Ljubljana")
|
|
|
|
|
|
def get_esm_data(usernames: Collection) -> pd.DataFrame:
|
|
"""
|
|
Read the data from the esm table and return it in a dataframe.
|
|
|
|
Parameters
|
|
----------
|
|
usernames: Collection
|
|
A list of usernames to put into the WHERE condition.
|
|
|
|
Returns
|
|
-------
|
|
df_esm: pd.DataFrame
|
|
A dataframe of esm data.
|
|
"""
|
|
query_esm = (
|
|
session.query(ESM, Participant.username)
|
|
.filter(Participant.id == ESM.participant_id)
|
|
.filter(Participant.username.in_(usernames))
|
|
)
|
|
with db_engine.connect() as connection:
|
|
df_esm = pd.read_sql(query_esm.statement, connection)
|
|
return df_esm
|
|
|
|
|
|
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
|
|
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
|
|
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
|
|
)
|
|
df_esm_json = pd.json_normalize(df_esm["esm_json"]).drop(
|
|
columns=["esm_trigger"]
|
|
) # The esm_trigger column is already present in the main df.
|
|
return df_esm.join(df_esm_json)
|