diff --git a/Snakefile b/Snakefile index 44e9b5bf..c591650a 100644 --- a/Snakefile +++ b/Snakefile @@ -174,6 +174,14 @@ for provider in config["PHONE_ESM"]["PROVIDERS"].keys(): # files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"])) # files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +for provider in config["PHONE_SPEECH"]["PROVIDERS"].keys(): + if config["PHONE_SPEECH"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/phone_speech_raw.csv",pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/phone_speech_with_datetime.csv",pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_speech_clean.csv",pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_speech_features/phone_speech_{language}_{provider_key}.csv",pid=config["PIDS"],language=get_script_language(config["PHONE_SPEECH"]["PROVIDERS"][provider]["SRC_SCRIPT"]),provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/phone_speech.csv", pid=config["PIDS"])) + # We can delete these if's as soon as we add feature PROVIDERS to any of these sensors if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict): for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys(): diff --git a/config.yaml b/config.yaml index 7a82190a..b384ef2f 100644 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ ######################################################################################################################## # See https://www.rapids.science/latest/setup/configuration/#participant-files -PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107'] +PIDS: ['p03'] #['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107'] # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files CREATE_PARTICIPANT_FILES: @@ -248,6 +248,15 @@ PHONE_ESM: FEATURES: [mean] SRC_SCRIPT: src/features/phone_esm/straw/main.py +# Custom sensor +PHONE_SPEECH: + CONTAINER: speech + PROVIDERS: + RAPIDS: + COMPUTE: True + FEATURES: ["countscans"] + SRC_SCRIPT: src/features/phone_speech/straw/main.py + # See https://www.rapids.science/latest/features/phone-keyboard/ PHONE_KEYBOARD: CONTAINER: keyboard @@ -349,6 +358,7 @@ PHONE_WIFI_VISIBLE: + ######################################################################################################################## # FITBIT # ######################################################################################################################## diff --git a/rules/features.smk b/rules/features.smk index 2638a8f3..83d08568 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -345,6 +345,19 @@ rule esm_features: script: "../src/features/entry.py" +rule phone_speech_python_features: + input: + sensor_data = "data/interim/{pid}/phone_speech_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["PHONE_SPEECH"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "phone_speech", + output: + "data/interim/{pid}/phone_speech_features/phone_speech_python_{provider_key}.csv" + script: + "../src/features/entry.py" + rule phone_keyboard_python_features: input: sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv", diff --git a/src/features/phone_speech/straw/main.py b/src/features/phone_speech/straw/main.py new file mode 100644 index 00000000..1ea83f91 --- /dev/null +++ b/src/features/phone_speech/straw/main.py @@ -0,0 +1,67 @@ +import pandas as pd + +# TODO: adjust features files +QUESTIONNAIRE_IDS = { + "sleep_quality": 1, + "PANAS_positive_affect": 8, + "PANAS_negative_affect": 9, + "JCQ_job_demand": 10, + "JCQ_job_control": 11, + "JCQ_supervisor_support": 12, + "JCQ_coworker_support": 13, + "PFITS_supervisor": 14, + "PFITS_coworkers": 15, + "UWES_vigor": 16, + "UWES_dedication": 17, + "UWES_absorption": 18, + "COPE_active": 19, + "COPE_support": 20, + "COPE_emotions": 21, + "balance_life_work": 22, + "balance_work_life": 23, + "recovery_experience_detachment": 24, + "recovery_experience_relaxation": 25, + "symptoms": 26, + "appraisal_stressfulness_event": 87, + "appraisal_threat": 88, + "appraisal_challenge": 89, + "appraisal_event_time": 90, + "appraisal_event_duration": 91, + "appraisal_event_work_related": 92, + "appraisal_stressfulness_period": 93, + "late_work": 94, + "work_hours": 95, + "left_work": 96, + "activities": 97, + "coffee_breaks": 98, + "at_work_yet": 99, +} + + +def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + esm_data = pd.read_csv(sensor_data_files["sensor_data"]) + requested_features = provider["FEATURES"] + # name of the features this function can compute + requested_scales = provider["SCALES"] + base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support", + "appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"] + #TODO Check valid questionnaire and feature names. + # the subset of requested features this function can compute + features_to_compute = list(set(requested_features) & set(base_features_names)) + esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) + if not esm_data.empty: + esm_data = filter_data_by_segment(esm_data, time_segment) + + if not esm_data.empty: + esm_features = pd.DataFrame() + for scale in requested_scales: + questionnaire_id = QUESTIONNAIRE_IDS[scale] + mask = esm_data["questionnaire_id"] == questionnaire_id + esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean() + #TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing. + + esm_features = esm_features.reset_index() + if 'index' in esm_features: # In calse of empty esm_features df + esm_features.rename(columns={'index': 'local_segment'}, inplace=True) + + return esm_features