Add heartrate features
parent
6c5ace5611
commit
770764ec8a
|
@ -44,6 +44,9 @@ rule all:
|
|||
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
||||
pid=config["PIDS"],
|
||||
fitbit_sensor=config["FITBIT_SENSORS"]),
|
||||
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
|
||||
pid = config["PIDS"],
|
||||
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
|
||||
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
||||
pid = config["PIDS"],
|
||||
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
||||
|
|
|
@ -72,7 +72,6 @@ BLUETOOTH:
|
|||
GOOGLE_ACTIVITY_RECOGNITION:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']
|
||||
|
||||
|
||||
BATTERY:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
|
@ -92,6 +91,10 @@ ACCELEROMETER:
|
|||
DAY_SEGMENTS: *day_segments
|
||||
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
||||
|
||||
HEARTRATE:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
||||
|
||||
STEP:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS:
|
||||
|
|
|
@ -131,6 +131,17 @@ rule accelerometer_metrics:
|
|||
script:
|
||||
"../src/features/accelerometer_metrics.py"
|
||||
|
||||
rule fitbit_heartrate_metrics:
|
||||
input:
|
||||
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
|
||||
params:
|
||||
day_segment = "{day_segment}",
|
||||
metrics = config["HEARTRATE"]["METRICS"],
|
||||
output:
|
||||
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/fitbit_heartrate_metrics.py"
|
||||
|
||||
rule fitbit_step_metrics:
|
||||
input:
|
||||
steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv",
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from scipy.stats import entropy
|
||||
import json
|
||||
|
||||
|
||||
heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
metrics = snakemake.params["metrics"]
|
||||
|
||||
|
||||
heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in metrics])
|
||||
if not heartrate_data.empty:
|
||||
device_id = heartrate_data["device_id"][0]
|
||||
num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||
if day_segment != "daily":
|
||||
heartrate_data =heartrate_data[heartrate_data["local_day_segment"] == day_segment]
|
||||
|
||||
if not heartrate_data.empty:
|
||||
heartrate_features = pd.DataFrame()
|
||||
|
||||
# get stats of heartrate
|
||||
if "maxhr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max()
|
||||
if "minhr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min()
|
||||
if "avghr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean()
|
||||
if "medianhr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median()
|
||||
if "modehr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
|
||||
if "stdhr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std()
|
||||
if "diffmaxmodehr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
|
||||
if "diffminmodehr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) - heartrate_data.groupby(["local_date"])["heartrate"].min()
|
||||
if "entropyhr" in metrics:
|
||||
heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy)
|
||||
|
||||
# get number of minutes in each heart rate zone
|
||||
for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(metrics)):
|
||||
heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]]
|
||||
heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
|
||||
|
||||
heartrate_features = heartrate_features.reset_index()
|
||||
|
||||
heartrate_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue