Add communication features to pipeline.

rapids
junos 2021-08-19 17:05:44 +02:00
parent 0ed34e97b3
commit 429aa43bd1
2 changed files with 21 additions and 4 deletions

View File

@ -18,9 +18,9 @@
import datetime import datetime
import os import os
import sys import sys
import yaml
import seaborn as sns import seaborn as sns
import yaml
from sklearn import linear_model from sklearn import linear_model
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
@ -157,9 +157,12 @@ lin_reg_proximity.score(
from machine_learning import pipeline from machine_learning import pipeline
# %% # %%
with open('../machine_learning/config/minimal_features.yaml', 'r') as file: with open("../machine_learning/config/minimal_features.yaml", "r") as file:
sensor_features = yaml.full_load(file) sensor_features = yaml.full_load(file)
# %%
sensor_features.get_sensor_data("proximity")
# %% # %%
sensor_features.set_sensor_data() sensor_features.set_sensor_data()

View File

@ -5,12 +5,12 @@ import yaml
from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_score
import participants.query_db import participants.query_db
from features import esm, helper, proximity from features import communication, esm, helper, proximity
from machine_learning import QUESTIONNAIRE_IDS, QUESTIONNAIRE_IDS_RENAME from machine_learning import QUESTIONNAIRE_IDS, QUESTIONNAIRE_IDS_RENAME
class SensorFeatures(yaml.YAMLObject): class SensorFeatures(yaml.YAMLObject):
yaml_tag = u'!SensorFeatures' yaml_tag = u"!SensorFeatures"
def __init__( def __init__(
self, self,
@ -34,6 +34,10 @@ class SensorFeatures(yaml.YAMLObject):
self.df_proximity = pd.DataFrame() self.df_proximity = pd.DataFrame()
self.df_proximity_counts = pd.DataFrame() self.df_proximity_counts = pd.DataFrame()
self.df_calls = pd.DataFrame()
self.df_sms = pd.DataFrame()
self.df_calls_sms = pd.DataFrame()
def set_sensor_data(self): def set_sensor_data(self):
if "proximity" in self.data_types: if "proximity" in self.data_types:
self.df_proximity = proximity.get_proximity_data( self.df_proximity = proximity.get_proximity_data(
@ -41,6 +45,12 @@ class SensorFeatures(yaml.YAMLObject):
) )
self.df_proximity = helper.get_date_from_timestamp(self.df_proximity) self.df_proximity = helper.get_date_from_timestamp(self.df_proximity)
self.df_proximity = proximity.recode_proximity(self.df_proximity) self.df_proximity = proximity.recode_proximity(self.df_proximity)
if "communication" in self.data_types:
self.df_calls = communication.get_call_data(self.participants_usernames)
self.df_calls = helper.get_date_from_timestamp(self.df_calls)
self.df_sms = communication.get_sms_data(self.participants_usernames)
self.df_sms = helper.get_date_from_timestamp(self.df_sms)
def get_sensor_data(self, data_type) -> pd.DataFrame: def get_sensor_data(self, data_type) -> pd.DataFrame:
if data_type == "proximity": if data_type == "proximity":
@ -53,6 +63,10 @@ class SensorFeatures(yaml.YAMLObject):
self.df_proximity_counts = proximity.count_proximity( self.df_proximity_counts = proximity.count_proximity(
self.df_proximity, ["participant_id", self.grouping_variable] self.df_proximity, ["participant_id", self.grouping_variable]
) )
if "communication" in self.data_types:
self.df_calls_sms = communication.calls_sms_features(
df_calls=self.df_calls, df_sms=self.df_sms
)
# TODO Think about joining dataframes. # TODO Think about joining dataframes.
def get_features(self, data_type, feature_names) -> pd.DataFrame: def get_features(self, data_type, feature_names) -> pd.DataFrame: