From 2e46f56111007d6debdfc17dbc261d7b4b6c6c9b Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 15 Feb 2021 18:29:05 -0500 Subject: [PATCH] Empatica zips must be placed in pid folder and small fixes --- Snakefile | 25 +++++++++++----------- config.yaml | 28 +++++++++++-------------- rules/common.smk | 9 ++------ rules/preprocessing.smk | 2 +- src/features/empatica_tags/dbdp/main.py | 21 ------------------- 5 files changed, 28 insertions(+), 57 deletions(-) delete mode 100644 src/features/empatica_tags/dbdp/main.py diff --git a/Snakefile b/Snakefile index c502d64b..122b1421 100644 --- a/Snakefile +++ b/Snakefile @@ -367,18 +367,19 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") -for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): - if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]: - for pid in config["PIDS"]: - suffixes = get_zip_suffixes(pid) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes)) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) - files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict): + for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): + if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]: + for pid in config["PIDS"]: + suffixes = get_zip_suffixes(pid) + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes)) + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # Visualization for Data Exploration if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]: diff --git a/config.yaml b/config.yaml index ed40edd9..59f88539 100644 --- a/config.yaml +++ b/config.yaml @@ -424,7 +424,7 @@ EMPATICA_DATA_CONFIGURATION: # See https://www.rapids.science/latest/features/fitbit-heartrate-summary/ EMPATICA_ACCELEROMETER: - TABLE: acc + TABLE: ACC PROVIDERS: DBDP: COMPUTE: False @@ -433,7 +433,7 @@ EMPATICA_ACCELEROMETER: SRC_LANGUAGE: "python" EMPATICA_HEARTRATE: - TABLE: hr + TABLE: HR PROVIDERS: DBDP: COMPUTE: False @@ -442,7 +442,7 @@ EMPATICA_HEARTRATE: SRC_LANGUAGE: "python" EMPATICA_TEMPERATURE: - TABLE: temp + TABLE: TEMP PROVIDERS: DBDP: COMPUTE: False @@ -451,40 +451,36 @@ EMPATICA_TEMPERATURE: SRC_LANGUAGE: "python" EMPATICA_ELECTRODERMAL_ACTIVITY: - TABLE: eda + TABLE: EDA PROVIDERS: DBDP: COMPUTE: False FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] - SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate + SRC_FOLDER: "dbdp" # inside src/features/empatica_electrodermal_activity SRC_LANGUAGE: "python" EMPATICA_BLOOD_VOLUME_PULSE: - TABLE: bvp + TABLE: BVP PROVIDERS: DBDP: COMPUTE: False FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] - SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate + SRC_FOLDER: "dbdp" # inside src/features/empatica_blood_volume_pulse SRC_LANGUAGE: "python" EMPATICA_INTER_BEAT_INTERVAL: - TABLE: ibi + TABLE: IBI PROVIDERS: DBDP: COMPUTE: False FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] - SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate + SRC_FOLDER: "dbdp" # inside src/features/inter_beat_interval SRC_LANGUAGE: "python" EMPATICA_TAGS: - TABLE: tags - PROVIDERS: - DBDP: - COMPUTE: False - FEATURES: [] - SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate - SRC_LANGUAGE: "python" + TABLE: TAGS + PROVIDERS: # None implemented yet + ######################################################################################################################## # PLOTS # diff --git a/rules/common.smk b/rules/common.smk index e6bfa030..91d0bf39 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -31,16 +31,11 @@ def get_phone_sensor_names(): return phone_sensor_names from pathlib import Path -import re - def get_zip_suffixes(pid): - zipfiles = list(Path("data/external/empatica").rglob(pid+"*.zip")) + zipfiles = list((Path("data/external/empatica/") / Path(pid)).rglob("*.zip")) suffixes = [] - pattern = re.compile("{}(.*)".format(pid)) for zipfile in zipfiles: - name = zipfile.stem - results = pattern.search(name) - suffixes.append(results.group(1)) + suffixes.append(zipfile.stem) return suffixes def get_all_raw_empatica_sensor_files(wildcards): diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index d62edb37..fcdea91a 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -246,7 +246,7 @@ rule fitbit_readable_datetime: from pathlib import Path rule unzip_empatica_data: input: - input_file = Path(config["EMPATICA_DATA_CONFIGURATION"]["SOURCE"]["FOLDER"]) / Path("{pid}{suffix}.zip"), + input_file = Path(config["EMPATICA_DATA_CONFIGURATION"]["SOURCE"]["FOLDER"]) / Path("{pid}") / Path("{suffix}.zip"), participant_file = "data/external/participant_files/{pid}.yaml" params: sensor = "{sensor}" diff --git a/src/features/empatica_tags/dbdp/main.py b/src/features/empatica_tags/dbdp/main.py deleted file mode 100644 index 82da2f3a..00000000 --- a/src/features/empatica_tags/dbdp/main.py +++ /dev/null @@ -1,21 +0,0 @@ -import pandas as pd -import numpy as np - -def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) - requested_features = provider["FEATURES"] - # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] - # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) - - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) - - if not sensor_data.empty: - features = pd.DataFrame() - - - return features \ No newline at end of file