diff --git a/config/environment.yml b/config/environment.yml index 5db94bf..7e49f23 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -14,6 +14,7 @@ dependencies: - pandas - psycopg2 - python-dotenv + - pytz - seaborn - sqlalchemy - tabulate \ No newline at end of file diff --git a/features/esm.py b/features/esm.py new file mode 100644 index 0000000..1783c20 --- /dev/null +++ b/features/esm.py @@ -0,0 +1,46 @@ +import datetime +from collections.abc import Collection + +import pandas as pd +from pytz import timezone + +from config.models import ESM, Participant +from setup import db_engine, session + +TZ_LJ = timezone("Europe/Ljubljana") + + +def get_esm_data(usernames: Collection) -> pd.DataFrame: + """ + Read the data from the esm table and return it in a dataframe. + + Parameters + ---------- + usernames: Collection + A list of usernames to put into the WHERE condition. + + Returns + ------- + df_esm: pd.DataFrame + A dataframe of call data. + """ + query_esm = ( + session.query(ESM, Participant.username) + .filter(Participant.id == ESM.participant_id) + .filter(Participant.username.in_(usernames)) + ) + with db_engine.connect() as connection: + df_esm = pd.read_sql(query_esm.statement, connection) + return df_esm + + +def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame: + df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply( + lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) + ) + #TODO: Deal with ESM_JSON + #(esm_json_b = purrr::map(esm_json, jsonlite::fromJSON), + # questionnaire_id = map_int(esm_json_b, "questionnaire_id", .default=NA), + # esm_type = map_int(esm_json_b, "esm_type", .default=NA), + # esm_question = map_chr(esm_json_b, "esm_instructions", .default=NA)) + return df_esm diff --git a/test/test_esm.py b/test/test_esm.py new file mode 100644 index 0000000..e90bb3d --- /dev/null +++ b/test/test_esm.py @@ -0,0 +1,20 @@ +import unittest + +import numpy as np +import pandas as pd +from numpy.random import default_rng +from pandas.testing import assert_series_equal + +from features.esm import preprocess_esm + + +class EsmFeatures(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.esm = pd.DataFrame( + {"double_esm_user_answer_timestamp": [1622127860000, 1622129860000]} + ) + + def test_preprocess_esm(self): + self.esm_processed = preprocess_esm(self.esm) + print(self.esm_processed)