From 6045df494dd936afb84bef02fb3d9ecb4623c0cd Mon Sep 17 00:00:00 2001 From: Mingze Cao <29229557+Martinze@users.noreply.github.com> Date: Thu, 2 Apr 2020 16:36:28 -0500 Subject: [PATCH] =?UTF-8?q?Modify=20the=20Accelerometer=20=E2=80=98Metrics?= =?UTF-8?q?=E2=80=99=20to=20Accelerometer=20=E2=80=98Features=E2=80=99=20C?= =?UTF-8?q?o-authored-by:=20Meng=20Li=20=20Co-autho?= =?UTF-8?q?red-by:=20JulioV=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 2 +- docs/features/extracted.rst | 14 ++--- rules/features.snakefile | 6 +- src/features/accelerometer_features.py | 87 ++++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 11 deletions(-) create mode 100644 src/features/accelerometer_features.py diff --git a/config.yaml b/config.yaml index cfced08e..bd32770b 100644 --- a/config.yaml +++ b/config.yaml @@ -98,7 +98,7 @@ LIGHT: ACCELEROMETER: DAY_SEGMENTS: *day_segments - METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"] + FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"] APPLICATIONS_FOREGROUND: DAY_SEGMENTS: *day_segments diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index f2b5ac68..9a2c4ccd 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -365,7 +365,7 @@ See `Accelerometer Config Code`_ .. - Apply readable datetime to Accelerometer dataset: ``expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),`` -- Extract Calls Metrics +- Extract Accelerometer Features | ``expand("data/processed/{pid}/accelerometer_{day_segment}.csv",`` | ``pid=config["PIDS"],`` @@ -381,9 +381,9 @@ See `Accelerometer Config Code`_ - **Script:** ``src/data/readable_datetime.R`` - See the readable_datetime.R_ script. -- **Rule:** ``rules/features.snakefile/accelerometer_metrics`` - See the accelerometer_metrics_ rule. +- **Rule:** ``rules/features.snakefile/accelerometer_features`` - See the accelerometer_features_ rule. - - **Script:** ``src/features/accelerometer_metrics.py`` - See the accelerometer_metrics.py_ script. + - **Script:** ``src/features/accelerometer_features.py`` - See the accelerometer_features.py_ script. .. _Accelerometer-parameters: @@ -394,14 +394,14 @@ See `Accelerometer Config Code`_ Name Description ============ =================== day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` -metrics The different measures that can be retrieved from the dataset. See :ref:`Available Accelerometer Metrics ` Table below +features The different measures that can be retrieved from the dataset. See :ref:`Available Accelerometer Features ` Table below ============ =================== -.. _accelerometer-available-metrics: +.. _accelerometer-available-features: -**Available Accelerometer Metrics** +**Available Accelerometer Features** -The following table shows a list of the available metrics the accelerometer sensor data for a particular ``day_segment``. +The following table shows a list of the available features the accelerometer sensor data for a particular ``day_segment``. ==================================== ============== ============= Name Units Description diff --git a/rules/features.snakefile b/rules/features.snakefile index ced22596..d793f75b 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -122,16 +122,16 @@ rule light_metrics: script: "../src/features/light_metrics.py" -rule accelerometer_metrics: +rule accelerometer_features: input: "data/raw/{pid}/accelerometer_with_datetime.csv", params: day_segment = "{day_segment}", - metrics = config["ACCELEROMETER"]["METRICS"], + features = config["ACCELEROMETER"]["FEATURES"], output: "data/processed/{pid}/accelerometer_{day_segment}.csv" script: - "../src/features/accelerometer_metrics.py" + "../src/features/accelerometer_features.py" rule applications_foreground_metrics: input: diff --git a/src/features/accelerometer_features.py b/src/features/accelerometer_features.py new file mode 100644 index 00000000..7d486b29 --- /dev/null +++ b/src/features/accelerometer_features.py @@ -0,0 +1,87 @@ +import pandas as pd +import numpy as np + +def getActivityEpisodes(acc_minute, activity_type): + col_name = ["nonexertional_episodes", "exertional_episodes"][activity_type] + + # rebuild local date time for resampling + acc_minute["local_datetime"] = pd.to_datetime(acc_minute["local_date"].dt.strftime("%Y-%m-%d") + \ + " " + acc_minute["local_hour"].apply(str) + ":" + acc_minute["local_minute"].apply(str) + ":00") + # resample the data into 1 minute bins + resampled_acc_minute = pd.DataFrame(acc_minute.resample("1T", on="local_datetime")["isexertionalactivity"].sum()) + + if activity_type == 0: + resampled_acc_minute["isexertionalactivity"] = resampled_acc_minute["isexertionalactivity"] * (-1) + 1 + + # get the longest episode of exertional/non-exertional activity given as consecutive one minute periods + resampled_acc_minute['consecutive'] = resampled_acc_minute["isexertionalactivity"].groupby((resampled_acc_minute["isexertionalactivity"] != resampled_acc_minute["isexertionalactivity"].shift()).cumsum()).transform('size') * resampled_acc_minute["isexertionalactivity"] + longest_activity_episodes = resampled_acc_minute.groupby(pd.Grouper(freq='D'))[["consecutive"]].max().rename(columns = {"consecutive": col_name}) + + # get the count of exertional/non-exertional activity episodes + resampled_acc_minute_shift = resampled_acc_minute.loc[resampled_acc_minute["consecutive"].shift() != resampled_acc_minute["consecutive"]] + count_activity_episodes = resampled_acc_minute_shift.groupby(pd.Grouper(freq='D'))[["consecutive"]].apply(lambda x: np.count_nonzero(x)).to_frame(name = col_name) + + return longest_activity_episodes, count_activity_episodes + +def dropRowsWithCertainThreshold(data, threshold): + data_grouped = data.groupby(["local_date", "local_hour", "local_minute"]).count() + drop_dates = data_grouped[data_grouped["timestamp"] == threshold].index + data.set_index(["local_date", "local_hour", "local_minute"], inplace = True) + if not drop_dates.empty: + data.drop(drop_dates, axis = 0, inplace = True) + return data.reset_index() + + +acc_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) +day_segment = snakemake.params["day_segment"] +features = snakemake.params["features"] + +acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features]) +if not acc_data.empty: + if day_segment != "daily": + acc_data = acc_data[acc_data["local_day_segment"] == day_segment] + if not acc_data.empty: + acc_features = pd.DataFrame() + # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) + acc_data["magnitude"] = (acc_data["double_values_0"] ** 2 + acc_data["double_values_1"] ** 2 + acc_data["double_values_2"] ** 2).apply(np.sqrt) + if "maxmagnitude" in features: + acc_features["acc_" + day_segment + "_maxmagnitude"] = acc_data.groupby(["local_date"])["magnitude"].max() + if "minmagnitude" in features: + acc_features["acc_" + day_segment + "_minmagnitude"] = acc_data.groupby(["local_date"])["magnitude"].min() + if "avgmagnitude" in features: + acc_features["acc_" + day_segment + "_avgmagnitude"] = acc_data.groupby(["local_date"])["magnitude"].mean() + if "medianmagnitude" in features: + acc_features["acc_" + day_segment + "_medianmagnitude"] = acc_data.groupby(["local_date"])["magnitude"].median() + if "stdmagnitude" in features: + acc_features["acc_" + day_segment + "_stdmagnitude"] = acc_data.groupby(["local_date"])["magnitude"].std() + + # get extertional activity features + # reference: https://jamanetwork.com/journals/jamasurgery/fullarticle/2753807 + + # drop rows where we only have one row per minute (no variance) + acc_data = dropRowsWithCertainThreshold(acc_data, 1) + if not acc_data.empty: + # check if the participant performs exertional activity for each minute + acc_minute = pd.DataFrame() + acc_minute["isexertionalactivity"] = (acc_data.groupby(["local_date", "local_hour", "local_minute"])["double_values_0"].var() + acc_data.groupby(["local_date", "local_hour", "local_minute"])["double_values_1"].var() + acc_data.groupby(["local_date", "local_hour", "local_minute"])["double_values_2"].var()).apply(lambda x: 1 if x > 0.15 * (9.807 ** 2) else 0) + acc_minute.reset_index(inplace=True) + + if "ratioexertionalactivityepisodes" in features: + acc_features["acc_" + day_segment + "_ratioexertionalactivityepisodes"] = acc_minute.groupby(["local_date"])["isexertionalactivity"].sum()/acc_minute.groupby(["local_date"])["isexertionalactivity"].count() + if "sumexertionalactivityepisodes" in features: + acc_features["acc_" + day_segment + "_sumexertionalactivityepisodes"] = acc_minute.groupby(["local_date"])["isexertionalactivity"].sum() + + longest_exertionalactivity_episodes, count_exertionalactivity_episodes = getActivityEpisodes(acc_minute, 1) + longest_nonexertionalactivity_episodes, count_nonexertionalactivity_episodes = getActivityEpisodes(acc_minute, 0) + if "longestexertionalactivityepisode" in features: + acc_features["acc_" + day_segment + "_longestexertionalactivityepisode"] = longest_exertionalactivity_episodes["exertional_episodes"] + if "longestnonexertionalactivityepisode" in features: + acc_features["acc_" + day_segment + "_longestnonexertionalactivityepisode"] = longest_nonexertionalactivity_episodes["nonexertional_episodes"] + if "countexertionalactivityepisodes" in features: + acc_features["acc_" + day_segment + "_countexertionalactivityepisodes"] = count_exertionalactivity_episodes["exertional_episodes"] + if "countnonexertionalactivityepisodes" in features: + acc_features["acc_" + day_segment + "_countnonexertionalactivityepisodes"] = count_nonexertionalactivity_episodes["nonexertional_episodes"] + + acc_features = acc_features.reset_index() + +acc_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file