diff --git a/Snakefile b/Snakefile index e98061fe..3e1647e9 100644 --- a/Snakefile +++ b/Snakefile @@ -44,6 +44,9 @@ rule all: expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv", pid=config["PIDS"], fitbit_sensor=config["FITBIT_SENSORS"]), + expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", + pid = config["PIDS"], + day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]), expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]), diff --git a/config.yaml b/config.yaml index 377afd85..a30b8c4f 100644 --- a/config.yaml +++ b/config.yaml @@ -72,7 +72,6 @@ BLUETOOTH: GOOGLE_ACTIVITY_RECOGNITION: DAY_SEGMENTS: *day_segments METRICS: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle'] - BATTERY: DAY_SEGMENTS: *day_segments @@ -92,6 +91,10 @@ ACCELEROMETER: DAY_SEGMENTS: *day_segments METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"] +HEARTRATE: + DAY_SEGMENTS: *day_segments + METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"] + STEP: DAY_SEGMENTS: *day_segments METRICS: diff --git a/rules/features.snakefile b/rules/features.snakefile index b10d8ace..1681e915 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -131,6 +131,17 @@ rule accelerometer_metrics: script: "../src/features/accelerometer_metrics.py" +rule fitbit_heartrate_metrics: + input: + "data/raw/{pid}/fitbit_heartrate_with_datetime.csv", + params: + day_segment = "{day_segment}", + metrics = config["HEARTRATE"]["METRICS"], + output: + "data/processed/{pid}/fitbit_heartrate_{day_segment}.csv" + script: + "../src/features/fitbit_heartrate_metrics.py" + rule fitbit_step_metrics: input: steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv", diff --git a/src/features/fitbit_heartrate_metrics.py b/src/features/fitbit_heartrate_metrics.py new file mode 100644 index 00000000..26f50917 --- /dev/null +++ b/src/features/fitbit_heartrate_metrics.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +from scipy.stats import entropy +import json + + +heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) +day_segment = snakemake.params["day_segment"] +metrics = snakemake.params["metrics"] + + +heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in metrics]) +if not heartrate_data.empty: + device_id = heartrate_data["device_id"][0] + num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"] + if day_segment != "daily": + heartrate_data =heartrate_data[heartrate_data["local_day_segment"] == day_segment] + + if not heartrate_data.empty: + heartrate_features = pd.DataFrame() + + # get stats of heartrate + if "maxhr" in metrics: + heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() + if "minhr" in metrics: + heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min() + if "avghr" in metrics: + heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean() + if "medianhr" in metrics: + heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median() + if "modehr" in metrics: + heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) + if "stdhr" in metrics: + heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std() + if "diffmaxmodehr" in metrics: + heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) + if "diffminmodehr" in metrics: + heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) - heartrate_data.groupby(["local_date"])["heartrate"].min() + if "entropyhr" in metrics: + heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy) + + # get number of minutes in each heart rate zone + for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(metrics)): + heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]] + heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute + + heartrate_features = heartrate_features.reset_index() + +heartrate_features.to_csv(snakemake.output[0], index=False)