Add a method to transform ESM data and the test.

2021-05-27 18:10:34 +02:00 · 2021-05-27 18:10:34 +02:00 · 0199b4f0f2
parent eebb10504e
commit 0199b4f0f2
3 changed files with 67 additions and 0 deletions
--- a/config/environment.yml
+++ b/config/environment.yml
@ -14,6 +14,7 @@ dependencies:
  - pandas
  - psycopg2
  - python-dotenv
  - pytz
  - seaborn
  - sqlalchemy
  - tabulate
--- a/features/esm.py
+++ b/features/esm.py
@ -0,0 +1,46 @@
 import datetime
 from collections.abc import Collection
 import pandas as pd
 from pytz import timezone
 from config.models import ESM, Participant
 from setup import db_engine, session
 TZ_LJ = timezone("Europe/Ljubljana")
 def get_esm_data(usernames: Collection) -> pd.DataFrame:
    """
    Read the data from the esm table and return it in a dataframe.
    Parameters
    ----------
    usernames: Collection
        A list of usernames to put into the WHERE condition.
    Returns
    -------
    df_esm: pd.DataFrame
        A dataframe of call data.
    """
    query_esm = (
        session.query(ESM, Participant.username)
        .filter(Participant.id == ESM.participant_id)
        .filter(Participant.username.in_(usernames))
    )
    with db_engine.connect() as connection:
        df_esm = pd.read_sql(query_esm.statement, connection)
    return df_esm
 def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
    df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
        lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
    )
    #TODO: Deal with ESM_JSON
    #(esm_json_b = purrr::map(esm_json, jsonlite::fromJSON),
                  # questionnaire_id = map_int(esm_json_b, "questionnaire_id", .default=NA),
                  # esm_type = map_int(esm_json_b, "esm_type", .default=NA),
                  # esm_question = map_chr(esm_json_b, "esm_instructions", .default=NA))
    return df_esm
--- a/test/test_esm.py
+++ b/test/test_esm.py
@ -0,0 +1,20 @@
 import unittest
 import numpy as np
 import pandas as pd
 from numpy.random import default_rng
 from pandas.testing import assert_series_equal
 from features.esm import preprocess_esm
 class EsmFeatures(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        cls.esm = pd.DataFrame(
            {"double_esm_user_answer_timestamp": [1622127860000, 1622129860000]}
        )
    def test_preprocess_esm(self):
        self.esm_processed = preprocess_esm(self.esm)
        print(self.esm_processed)