diff --git a/config/environment.yml b/config/environment.yml index d0a0923..62cb210 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -12,7 +12,7 @@ dependencies: - mypy - nodejs - pandas - - psycopg2 + - psycopg2 >= 2.9.1 - python-dotenv - pytz - pyprojroot diff --git a/machine_learning/labels.py b/machine_learning/labels.py index 6a59c65..6e17c3e 100644 --- a/machine_learning/labels.py +++ b/machine_learning/labels.py @@ -35,6 +35,7 @@ class Labels: participants_usernames = participants.query_db.get_usernames( collection_start=datetime.date.fromisoformat("2020-08-01") ) + self.participants_label = "all" self.participants_usernames = participants_usernames self.df_esm = pd.DataFrame() diff --git a/statistical_analysis/adherence.py b/statistical_analysis/adherence.py index 5dc2e4a..2477355 100644 --- a/statistical_analysis/adherence.py +++ b/statistical_analysis/adherence.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.11.4 +# jupytext_version: 1.12.0 # kernelspec: # display_name: straw2analysis # language: python @@ -96,13 +96,31 @@ df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocesse # Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response. # %% -df_session_counts_time +df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response"].astype("category") +df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.remove_categories(['during_work_first', 'ema_unanswered', 'evening_first', 'morning', 'morning_first']) +df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.add_categories("interrupted") +df_session_counts_time.loc[df_session_counts_time["session_response_cat"].isna(), "session_response_cat"] = "interrupted" +#df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({ +# "ema_unanswered": "interrupted", +# "morning_first": "interrupted", +# "evening_first": "interrupted", +# "morning": "interrupted", +# "during_work_first": "interrupted"}) + +# %% +df_session_counts_time.session_response_cat # %% tbl_session_outcomes = df_session_counts_time.reset_index()[ - "session_response" + "session_response_cat" ].value_counts() +# %% +tbl_session_outcomes_relative = tbl_session_outcomes / len(df_session_counts_time) + +# %% +print(tbl_session_outcomes_relative.to_latex(escape=True)) + # %% print("All sessions:", len(df_session_counts_time)) print("-------------------------------------") diff --git a/test/test_features_sensor.py b/test/test_features_sensor.py new file mode 100644 index 0000000..3a9a215 --- /dev/null +++ b/test/test_features_sensor.py @@ -0,0 +1,25 @@ +import unittest + +import yaml +from pyprojroot import here + +from machine_learning.features_sensor import * + + +class SensorFeaturesTest(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + with open(here("machine_learning/config/minimal_features.yaml"), "r") as file: + cls.sensor_features_params = yaml.safe_load(file) + + def test_yaml(self): + with open(here("machine_learning/config/minimal_features.yaml"), "r") as file: + sensor_features_params = yaml.safe_load(file) + self.assertIsInstance(sensor_features_params, dict) + self.assertIsInstance(sensor_features_params.get("grouping_variable"), str) + self.assertIsInstance(sensor_features_params.get("features"), dict) + self.assertIsInstance(sensor_features_params.get("participants_usernames"), list) + + def test_participants_label(self): + sensor_features = SensorFeatures(**self.sensor_features_params) + self.assertRaises(ValueError, sensor_features.calculate_features) \ No newline at end of file