[WIP] Add a test for SensorFeatures.

Additional analysis for adherence.
Small corrections.
rapids
junos 2021-10-13 13:39:58 +02:00
parent b8c7606664
commit a63a7eac99
4 changed files with 48 additions and 4 deletions

View File

@ -12,7 +12,7 @@ dependencies:
- mypy - mypy
- nodejs - nodejs
- pandas - pandas
- psycopg2 - psycopg2 >= 2.9.1
- python-dotenv - python-dotenv
- pytz - pytz
- pyprojroot - pyprojroot

View File

@ -35,6 +35,7 @@ class Labels:
participants_usernames = participants.query_db.get_usernames( participants_usernames = participants.query_db.get_usernames(
collection_start=datetime.date.fromisoformat("2020-08-01") collection_start=datetime.date.fromisoformat("2020-08-01")
) )
self.participants_label = "all"
self.participants_usernames = participants_usernames self.participants_usernames = participants_usernames
self.df_esm = pd.DataFrame() self.df_esm = pd.DataFrame()

View File

@ -6,7 +6,7 @@
# extension: .py # extension: .py
# format_name: percent # format_name: percent
# format_version: '1.3' # format_version: '1.3'
# jupytext_version: 1.11.4 # jupytext_version: 1.12.0
# kernelspec: # kernelspec:
# display_name: straw2analysis # display_name: straw2analysis
# language: python # language: python
@ -96,13 +96,31 @@ df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocesse
# Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response. # Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response.
# %% # %%
df_session_counts_time df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response"].astype("category")
df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.remove_categories(['during_work_first', 'ema_unanswered', 'evening_first', 'morning', 'morning_first'])
df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.add_categories("interrupted")
df_session_counts_time.loc[df_session_counts_time["session_response_cat"].isna(), "session_response_cat"] = "interrupted"
#df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({
# "ema_unanswered": "interrupted",
# "morning_first": "interrupted",
# "evening_first": "interrupted",
# "morning": "interrupted",
# "during_work_first": "interrupted"})
# %%
df_session_counts_time.session_response_cat
# %% # %%
tbl_session_outcomes = df_session_counts_time.reset_index()[ tbl_session_outcomes = df_session_counts_time.reset_index()[
"session_response" "session_response_cat"
].value_counts() ].value_counts()
# %%
tbl_session_outcomes_relative = tbl_session_outcomes / len(df_session_counts_time)
# %%
print(tbl_session_outcomes_relative.to_latex(escape=True))
# %% # %%
print("All sessions:", len(df_session_counts_time)) print("All sessions:", len(df_session_counts_time))
print("-------------------------------------") print("-------------------------------------")

View File

@ -0,0 +1,25 @@
import unittest
import yaml
from pyprojroot import here
from machine_learning.features_sensor import *
class SensorFeaturesTest(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
with open(here("machine_learning/config/minimal_features.yaml"), "r") as file:
cls.sensor_features_params = yaml.safe_load(file)
def test_yaml(self):
with open(here("machine_learning/config/minimal_features.yaml"), "r") as file:
sensor_features_params = yaml.safe_load(file)
self.assertIsInstance(sensor_features_params, dict)
self.assertIsInstance(sensor_features_params.get("grouping_variable"), str)
self.assertIsInstance(sensor_features_params.get("features"), dict)
self.assertIsInstance(sensor_features_params.get("participants_usernames"), list)
def test_participants_label(self):
sensor_features = SensorFeatures(**self.sensor_features_params)
self.assertRaises(ValueError, sensor_features.calculate_features)