Add heartrate features
parent
6c5ace5611
commit
770764ec8a
|
@ -44,6 +44,9 @@ rule all:
|
||||||
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
fitbit_sensor=config["FITBIT_SENSORS"]),
|
fitbit_sensor=config["FITBIT_SENSORS"]),
|
||||||
|
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
|
||||||
|
pid = config["PIDS"],
|
||||||
|
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
|
||||||
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
||||||
|
|
|
@ -72,7 +72,6 @@ BLUETOOTH:
|
||||||
GOOGLE_ACTIVITY_RECOGNITION:
|
GOOGLE_ACTIVITY_RECOGNITION:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']
|
METRICS: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']
|
||||||
|
|
||||||
|
|
||||||
BATTERY:
|
BATTERY:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
@ -92,6 +91,10 @@ ACCELEROMETER:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
||||||
|
|
||||||
|
HEARTRATE:
|
||||||
|
DAY_SEGMENTS: *day_segments
|
||||||
|
METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
||||||
|
|
||||||
STEP:
|
STEP:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS:
|
METRICS:
|
||||||
|
|
|
@ -131,6 +131,17 @@ rule accelerometer_metrics:
|
||||||
script:
|
script:
|
||||||
"../src/features/accelerometer_metrics.py"
|
"../src/features/accelerometer_metrics.py"
|
||||||
|
|
||||||
|
rule fitbit_heartrate_metrics:
|
||||||
|
input:
|
||||||
|
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
|
||||||
|
params:
|
||||||
|
day_segment = "{day_segment}",
|
||||||
|
metrics = config["HEARTRATE"]["METRICS"],
|
||||||
|
output:
|
||||||
|
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/fitbit_heartrate_metrics.py"
|
||||||
|
|
||||||
rule fitbit_step_metrics:
|
rule fitbit_step_metrics:
|
||||||
input:
|
input:
|
||||||
steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv",
|
steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv",
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from scipy.stats import entropy
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
|
||||||
|
day_segment = snakemake.params["day_segment"]
|
||||||
|
metrics = snakemake.params["metrics"]
|
||||||
|
|
||||||
|
|
||||||
|
heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in metrics])
|
||||||
|
if not heartrate_data.empty:
|
||||||
|
device_id = heartrate_data["device_id"][0]
|
||||||
|
num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||||
|
if day_segment != "daily":
|
||||||
|
heartrate_data =heartrate_data[heartrate_data["local_day_segment"] == day_segment]
|
||||||
|
|
||||||
|
if not heartrate_data.empty:
|
||||||
|
heartrate_features = pd.DataFrame()
|
||||||
|
|
||||||
|
# get stats of heartrate
|
||||||
|
if "maxhr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max()
|
||||||
|
if "minhr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min()
|
||||||
|
if "avghr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean()
|
||||||
|
if "medianhr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median()
|
||||||
|
if "modehr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
|
||||||
|
if "stdhr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std()
|
||||||
|
if "diffmaxmodehr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
|
||||||
|
if "diffminmodehr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) - heartrate_data.groupby(["local_date"])["heartrate"].min()
|
||||||
|
if "entropyhr" in metrics:
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy)
|
||||||
|
|
||||||
|
# get number of minutes in each heart rate zone
|
||||||
|
for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(metrics)):
|
||||||
|
heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]]
|
||||||
|
heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
|
||||||
|
|
||||||
|
heartrate_features = heartrate_features.reset_index()
|
||||||
|
|
||||||
|
heartrate_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue