stress_at_work_analysis/features/esm.py

47 lines
1.4 KiB
Python
Raw Normal View History

import datetime
from collections.abc import Collection
import pandas as pd
from pytz import timezone
from config.models import ESM, Participant
from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
def get_esm_data(usernames: Collection) -> pd.DataFrame:
"""
Read the data from the esm table and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
-------
df_esm: pd.DataFrame
A dataframe of call data.
"""
query_esm = (
session.query(ESM, Participant.username)
.filter(Participant.id == ESM.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_esm = pd.read_sql(query_esm.statement, connection)
return df_esm
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
#TODO: Deal with ESM_JSON
#(esm_json_b = purrr::map(esm_json, jsonlite::fromJSON),
# questionnaire_id = map_int(esm_json_b, "questionnaire_id", .default=NA),
# esm_type = map_int(esm_json_b, "esm_type", .default=NA),
# esm_question = map_chr(esm_json_b, "esm_instructions", .default=NA))
return df_esm