diff --git a/README.md b/README.md index c7ad8eb..ff7f95f 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ To install: ipython kernel install --user --name=straw2analysis ``` -2. Provide an .env file to be used by `python-dotenv` which should be placed in the top folder of the application +2. Provide a file called `.env` to be used by `python-dotenv` which should be placed in the top folder of the application and should have the form: ``` diff --git a/config/environment.yml b/config/environment.yml index e1ccedf..a64d1e1 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -15,6 +15,7 @@ dependencies: - psycopg2 - python-dotenv - pytz + - pyyaml - seaborn - scikit-learn - sqlalchemy diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index c57bea3..98993df 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -18,6 +18,7 @@ import datetime import os import sys +import yaml import seaborn as sns from sklearn import linear_model @@ -156,20 +157,17 @@ lin_reg_proximity.score( from machine_learning import pipeline # %% -ml_pipeline = pipeline.MachineLearningPipeline( - labels_questionnaire="PANAS", data_types="proximity" -) +with open('../machine_learning/config/minimal_features.yaml', 'r') as file: + sensor_features = yaml.full_load(file) # %% -ml_pipeline.get_labels() - -# %% tags=[] -ml_pipeline.get_sensor_data() +sensor_features.set_sensor_data() # %% -ml_pipeline.aggregate_daily() +sensor_features.get_sensor_data("proximity") # %% -ml_pipeline.df_full_data_daily_means +sensor_features.calculate_features() # %% +sensor_features.get_features("proximity", "all") diff --git a/machine_learning/config/minimal_features.yaml b/machine_learning/config/minimal_features.yaml new file mode 100644 index 0000000..c54f47e --- /dev/null +++ b/machine_learning/config/minimal_features.yaml @@ -0,0 +1,5 @@ +--- !SensorFeatures +grouping_variable: date_lj +data_types: [proximity] +feature_names: all +participants_usernames: [nokia_0000003] diff --git a/machine_learning/pipeline.py b/machine_learning/pipeline.py index 223131c..e922088 100644 --- a/machine_learning/pipeline.py +++ b/machine_learning/pipeline.py @@ -1,6 +1,7 @@ import datetime import pandas as pd +import yaml from sklearn.model_selection import cross_val_score import participants.query_db @@ -8,7 +9,9 @@ from features import esm, helper, proximity from machine_learning import QUESTIONNAIRE_IDS, QUESTIONNAIRE_IDS_RENAME -class SensorFeatures: +class SensorFeatures(yaml.YAMLObject): + yaml_tag = u'!SensorFeatures' + def __init__( self, grouping_variable,