diff --git a/Snakefile b/Snakefile index 475f21c4..b06cd351 100644 --- a/Snakefile +++ b/Snakefile @@ -294,12 +294,12 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys(): suffixes = get_zip_suffixes(pid) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw_{suffix}.csv", pid=pid, suffix=suffixes)) - # files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"])) - # files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"])) - # files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) - # files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) - # files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) - # files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: diff --git a/config.yaml b/config.yaml index 4fd977c2..ca830600 100644 --- a/config.yaml +++ b/config.yaml @@ -427,8 +427,8 @@ EMPATICA_ACCELEROMETER: TABLE: acc PROVIDERS: DBDP: - COMPUTE: True - FEATURES: [] + COMPUTE: False + FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_FOLDER: "dbdp" # inside src/features/empatica_accelerometer SRC_LANGUAGE: "python" diff --git a/src/features/empatica_accelerometer/dbdp/main.py b/src/features/empatica_accelerometer/dbdp/main.py index 82da2f3a..e95636c4 100644 --- a/src/features/empatica_accelerometer/dbdp/main.py +++ b/src/features/empatica_accelerometer/dbdp/main.py @@ -3,19 +3,34 @@ import numpy as np def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) + acc_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_features = provider["FEATURES"] # name of the features this function can compute - base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] # the subset of requested features this function can compute features_to_compute = list(set(requested_features) & set(base_features_names)) - features = pd.DataFrame(columns=["local_segment"] + features_to_compute) - if not sensor_data.empty: - sensor_data = filter_data_by_segment(sensor_data, time_segment) + acc_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) + if not acc_data.empty: + acc_data = filter_data_by_segment(acc_data, time_segment) - if not sensor_data.empty: - features = pd.DataFrame() + if not acc_data.empty: + acc_features = pd.DataFrame() + # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) + magnitude = acc_data.apply(lambda row: np.sqrt(row["x"] ** 2 + row["y"] ** 2 + row["z"] ** 2), axis=1) + acc_data = acc_data.assign(magnitude = magnitude.values) + if "maxmagnitude" in features_to_compute: + acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max() + if "minmagnitude" in features_to_compute: + acc_features["minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min() + if "avgmagnitude" in features_to_compute: + acc_features["avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean() + if "medianmagnitude" in features_to_compute: + acc_features["medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median() + if "stdmagnitude" in features_to_compute: + acc_features["stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std() + + acc_features = acc_features.reset_index() - return features \ No newline at end of file + return acc_features \ No newline at end of file