From 29dcd1f284f71481cac28134c527f9f4a5a9e2c1 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Tue, 13 Oct 2020 11:42:36 -0400 Subject: [PATCH] Add accelerometer features of RAPIDS provider --- Snakefile | 10 ++++--- config.yaml | 14 ++++----- rules/features.smk | 30 +++++++++++++------ src/features/accelerometer/rapids/main.py | 36 +++++++++++++++++++++++ 4 files changed, 69 insertions(+), 21 deletions(-) create mode 100644 src/features/accelerometer/rapids/main.py diff --git a/Snakefile b/Snakefile index dc023282..e95d14ab 100644 --- a/Snakefile +++ b/Snakefile @@ -104,10 +104,12 @@ for provider in config["LIGHT"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower())) files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower())) -if config["ACCELEROMETER"]["COMPUTE"]: - files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"])) - files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"])) - files_to_compute.extend(expand("data/processed/{pid}/accelerometer_{day_segment}.csv", pid = config["PIDS"], day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"])) +for provider in config["ACCELEROMETER"]["PROVIDERS"].keys(): + if config["ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"])) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"])) + files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACCELEROMETER".lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACCELEROMETER".lower())) for provider in config["APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys(): if config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]: diff --git a/config.yaml b/config.yaml index a852c126..62f89236 100644 --- a/config.yaml +++ b/config.yaml @@ -158,16 +158,14 @@ LIGHT: SRC_FOLDER: "rapids" # inside src/features/light SRC_LANGUAGE: "python" - ACCELEROMETER: - COMPUTE: False DB_TABLE: accelerometer - DAY_SEGMENTS: *day_segments - FEATURES: - MAGNITUDE: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] - EXERTIONAL_ACTIVITY_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] - NONEXERTIONAL_ACTIVITY_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] - VALID_SENSED_MINUTES: False + PROVIDERS: + RAPIDS: + COMPUTE: False + FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + SRC_FOLDER: "rapids" # inside src/features/accelerometer + SRC_LANGUAGE: "python" APPLICATIONS_FOREGROUND: DB_TABLE: applications_foreground diff --git a/rules/features.smk b/rules/features.smk index 2e8424ad..7945d62d 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -28,19 +28,31 @@ rule resample_episodes_with_datetime: script: "../src/data/readable_datetime.R" -rule accelerometer_features: +rule accelerometer_r_features: input: - expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"]), + sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0], + day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv" params: - day_segment = "{day_segment}", - magnitude = config["ACCELEROMETER"]["FEATURES"]["MAGNITUDE"], - exertional_activity_episode = config["ACCELEROMETER"]["FEATURES"]["EXERTIONAL_ACTIVITY_EPISODE"], - nonexertional_activity_episode = config["ACCELEROMETER"]["FEATURES"]["NONEXERTIONAL_ACTIVITY_EPISODE"], - valid_sensed_minutes = config["ACCELEROMETER"]["FEATURES"]["VALID_SENSED_MINUTES"], + provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key], + provider_key = "{provider_key}", + sensor_key = "accelerometer" output: - "data/processed/{pid}/accelerometer_{day_segment}.csv" + "data/interim/{pid}/accelerometer_features/accelerometer_r_{provider_key}.csv" script: - "../src/features/accelerometer_features.py" + "../src/features/entry.R" + +rule accelerometer_python_features: + input: + sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0], + day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv" + params: + provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key], + provider_key = "{provider_key}", + sensor_key = "accelerometer" + output: + "data/interim/{pid}/accelerometer_features/accelerometer_python_{provider_key}.csv" + script: + "../src/features/entry.py" rule activity_recognition_episodes: input: diff --git a/src/features/accelerometer/rapids/main.py b/src/features/accelerometer/rapids/main.py new file mode 100644 index 00000000..09920343 --- /dev/null +++ b/src/features/accelerometer/rapids/main.py @@ -0,0 +1,36 @@ +import pandas as pd +import numpy as np + +def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs): + + acc_data = pd.read_csv(sensor_data_files["sensor_data"]) + requested_features = provider["FEATURES"] + # name of the features this function can compute + base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + # the subset of requested features this function can compute + features_to_compute = list(set(requested_features) & set(base_features_names)) + + acc_features = pd.DataFrame(columns=["local_segment"] + ["acc_rapids_" + x for x in features_to_compute]) + if not acc_data.empty: + acc_data = filter_data_by_segment(acc_data, day_segment) + + if not acc_data.empty: + acc_features = pd.DataFrame() + # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) + magnitude = acc_data.apply(lambda row: np.sqrt(row["double_values_0"] ** 2 + row["double_values_1"] ** 2 + row["double_values_2"] ** 2), axis=1) + acc_data = acc_data.assign(magnitude = magnitude.values) + + if "maxmagnitude" in features_to_compute: + acc_features["acc_rapids_maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max() + if "minmagnitude" in features_to_compute: + acc_features["acc_rapids_minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min() + if "avgmagnitude" in features_to_compute: + acc_features["acc_rapids_avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean() + if "medianmagnitude" in features_to_compute: + acc_features["acc_rapids_medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median() + if "stdmagnitude" in features_to_compute: + acc_features["acc_rapids_stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std() + + acc_features = acc_features.reset_index() + + return acc_features