stress_at_work_analysis/features/esm.py

47 lines
1.4 KiB
Python

import datetime
from collections.abc import Collection
import pandas as pd
from pytz import timezone
from config.models import ESM, Participant
from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
def get_esm_data(usernames: Collection) -> pd.DataFrame:
"""
Read the data from the esm table and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
-------
df_esm: pd.DataFrame
A dataframe of call data.
"""
query_esm = (
session.query(ESM, Participant.username)
.filter(Participant.id == ESM.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_esm = pd.read_sql(query_esm.statement, connection)
return df_esm
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
#TODO: Deal with ESM_JSON
#(esm_json_b = purrr::map(esm_json, jsonlite::fromJSON),
# questionnaire_id = map_int(esm_json_b, "questionnaire_id", .default=NA),
# esm_type = map_int(esm_json_b, "esm_type", .default=NA),
# esm_question = map_chr(esm_json_b, "esm_instructions", .default=NA))
return df_esm