Empatica zips must be placed in pid folder and small fixes

feature/plugin_sentimental
JulioV 2021-02-15 18:29:05 -05:00
parent a26a44819a
commit 2e46f56111
5 changed files with 28 additions and 57 deletions

View File

@ -367,18 +367,19 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict):
if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]: for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys():
for pid in config["PIDS"]: if config["EMPATICA_TAGS"]["PROVIDERS"][provider]["COMPUTE"]:
suffixes = get_zip_suffixes(pid) for pid in config["PIDS"]:
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_raw_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_joined.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/raw/{pid}/empatica_tags_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/empatica_tags_features/empatica_tags_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_TAGS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/empatica_tags.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
# Visualization for Data Exploration # Visualization for Data Exploration
if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]: if config["HISTOGRAM_PHONE_DATA_YIELD"]["PLOT"]:

View File

@ -424,7 +424,7 @@ EMPATICA_DATA_CONFIGURATION:
# See https://www.rapids.science/latest/features/fitbit-heartrate-summary/ # See https://www.rapids.science/latest/features/fitbit-heartrate-summary/
EMPATICA_ACCELEROMETER: EMPATICA_ACCELEROMETER:
TABLE: acc TABLE: ACC
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
@ -433,7 +433,7 @@ EMPATICA_ACCELEROMETER:
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_HEARTRATE: EMPATICA_HEARTRATE:
TABLE: hr TABLE: HR
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
@ -442,7 +442,7 @@ EMPATICA_HEARTRATE:
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_TEMPERATURE: EMPATICA_TEMPERATURE:
TABLE: temp TABLE: TEMP
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
@ -451,40 +451,36 @@ EMPATICA_TEMPERATURE:
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_ELECTRODERMAL_ACTIVITY: EMPATICA_ELECTRODERMAL_ACTIVITY:
TABLE: eda TABLE: EDA
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_FOLDER: "dbdp" # inside src/features/empatica_electrodermal_activity
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_BLOOD_VOLUME_PULSE: EMPATICA_BLOOD_VOLUME_PULSE:
TABLE: bvp TABLE: BVP
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_FOLDER: "dbdp" # inside src/features/empatica_blood_volume_pulse
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_INTER_BEAT_INTERVAL: EMPATICA_INTER_BEAT_INTERVAL:
TABLE: ibi TABLE: IBI
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: False COMPUTE: False
FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate SRC_FOLDER: "dbdp" # inside src/features/inter_beat_interval
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
EMPATICA_TAGS: EMPATICA_TAGS:
TABLE: tags TABLE: TAGS
PROVIDERS: PROVIDERS: # None implemented yet
DBDP:
COMPUTE: False
FEATURES: []
SRC_FOLDER: "dbdp" # inside src/features/empatica_heartrate
SRC_LANGUAGE: "python"
######################################################################################################################## ########################################################################################################################
# PLOTS # # PLOTS #

View File

@ -31,16 +31,11 @@ def get_phone_sensor_names():
return phone_sensor_names return phone_sensor_names
from pathlib import Path from pathlib import Path
import re
def get_zip_suffixes(pid): def get_zip_suffixes(pid):
zipfiles = list(Path("data/external/empatica").rglob(pid+"*.zip")) zipfiles = list((Path("data/external/empatica/") / Path(pid)).rglob("*.zip"))
suffixes = [] suffixes = []
pattern = re.compile("{}(.*)".format(pid))
for zipfile in zipfiles: for zipfile in zipfiles:
name = zipfile.stem suffixes.append(zipfile.stem)
results = pattern.search(name)
suffixes.append(results.group(1))
return suffixes return suffixes
def get_all_raw_empatica_sensor_files(wildcards): def get_all_raw_empatica_sensor_files(wildcards):

View File

@ -246,7 +246,7 @@ rule fitbit_readable_datetime:
from pathlib import Path from pathlib import Path
rule unzip_empatica_data: rule unzip_empatica_data:
input: input:
input_file = Path(config["EMPATICA_DATA_CONFIGURATION"]["SOURCE"]["FOLDER"]) / Path("{pid}{suffix}.zip"), input_file = Path(config["EMPATICA_DATA_CONFIGURATION"]["SOURCE"]["FOLDER"]) / Path("{pid}") / Path("{suffix}.zip"),
participant_file = "data/external/participant_files/{pid}.yaml" participant_file = "data/external/participant_files/{pid}.yaml"
params: params:
sensor = "{sensor}" sensor = "{sensor}"

View File

@ -1,21 +0,0 @@
import pandas as pd
import numpy as np
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
sensor_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"]
# name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment)
if not sensor_data.empty:
features = pd.DataFrame()
return features