Add a method to transform ESM data and the test.

communication
junos 2021-05-27 18:10:34 +02:00
parent eebb10504e
commit 0199b4f0f2
3 changed files with 67 additions and 0 deletions

View File

@ -14,6 +14,7 @@ dependencies:
- pandas
- psycopg2
- python-dotenv
- pytz
- seaborn
- sqlalchemy
- tabulate

46
features/esm.py 100644
View File

@ -0,0 +1,46 @@
import datetime
from collections.abc import Collection
import pandas as pd
from pytz import timezone
from config.models import ESM, Participant
from setup import db_engine, session
TZ_LJ = timezone("Europe/Ljubljana")
def get_esm_data(usernames: Collection) -> pd.DataFrame:
"""
Read the data from the esm table and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
-------
df_esm: pd.DataFrame
A dataframe of call data.
"""
query_esm = (
session.query(ESM, Participant.username)
.filter(Participant.id == ESM.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_esm = pd.read_sql(query_esm.statement, connection)
return df_esm
def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
df_esm["datetime_lj"] = df_esm["double_esm_user_answer_timestamp"].apply(
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
)
#TODO: Deal with ESM_JSON
#(esm_json_b = purrr::map(esm_json, jsonlite::fromJSON),
# questionnaire_id = map_int(esm_json_b, "questionnaire_id", .default=NA),
# esm_type = map_int(esm_json_b, "esm_type", .default=NA),
# esm_question = map_chr(esm_json_b, "esm_instructions", .default=NA))
return df_esm

20
test/test_esm.py 100644
View File

@ -0,0 +1,20 @@
import unittest
import numpy as np
import pandas as pd
from numpy.random import default_rng
from pandas.testing import assert_series_equal
from features.esm import preprocess_esm
class EsmFeatures(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.esm = pd.DataFrame(
{"double_esm_user_answer_timestamp": [1622127860000, 1622129860000]}
)
def test_preprocess_esm(self):
self.esm_processed = preprocess_esm(self.esm)
print(self.esm_processed)