diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index 98993df..d6a9831 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -18,9 +18,9 @@ import datetime import os import sys -import yaml import seaborn as sns +import yaml from sklearn import linear_model from sklearn.model_selection import LeaveOneGroupOut, cross_val_score @@ -157,9 +157,12 @@ lin_reg_proximity.score( from machine_learning import pipeline # %% -with open('../machine_learning/config/minimal_features.yaml', 'r') as file: +with open("../machine_learning/config/minimal_features.yaml", "r") as file: sensor_features = yaml.full_load(file) +# %% +sensor_features.get_sensor_data("proximity") + # %% sensor_features.set_sensor_data() diff --git a/machine_learning/pipeline.py b/machine_learning/pipeline.py index e922088..f325e91 100644 --- a/machine_learning/pipeline.py +++ b/machine_learning/pipeline.py @@ -5,12 +5,12 @@ import yaml from sklearn.model_selection import cross_val_score import participants.query_db -from features import esm, helper, proximity +from features import communication, esm, helper, proximity from machine_learning import QUESTIONNAIRE_IDS, QUESTIONNAIRE_IDS_RENAME class SensorFeatures(yaml.YAMLObject): - yaml_tag = u'!SensorFeatures' + yaml_tag = u"!SensorFeatures" def __init__( self, @@ -34,6 +34,10 @@ class SensorFeatures(yaml.YAMLObject): self.df_proximity = pd.DataFrame() self.df_proximity_counts = pd.DataFrame() + self.df_calls = pd.DataFrame() + self.df_sms = pd.DataFrame() + self.df_calls_sms = pd.DataFrame() + def set_sensor_data(self): if "proximity" in self.data_types: self.df_proximity = proximity.get_proximity_data( @@ -41,6 +45,12 @@ class SensorFeatures(yaml.YAMLObject): ) self.df_proximity = helper.get_date_from_timestamp(self.df_proximity) self.df_proximity = proximity.recode_proximity(self.df_proximity) + if "communication" in self.data_types: + self.df_calls = communication.get_call_data(self.participants_usernames) + self.df_calls = helper.get_date_from_timestamp(self.df_calls) + + self.df_sms = communication.get_sms_data(self.participants_usernames) + self.df_sms = helper.get_date_from_timestamp(self.df_sms) def get_sensor_data(self, data_type) -> pd.DataFrame: if data_type == "proximity": @@ -53,6 +63,10 @@ class SensorFeatures(yaml.YAMLObject): self.df_proximity_counts = proximity.count_proximity( self.df_proximity, ["participant_id", self.grouping_variable] ) + if "communication" in self.data_types: + self.df_calls_sms = communication.calls_sms_features( + df_calls=self.df_calls, df_sms=self.df_sms + ) # TODO Think about joining dataframes. def get_features(self, data_type, feature_names) -> pd.DataFrame: