Add heartrate features

replace/69da230f52c04f435e816ceab27e269ce7de2e96
Meng Li 2020-02-07 11:35:15 -05:00
parent 6c5ace5611
commit 770764ec8a
4 changed files with 67 additions and 1 deletions

View File

@ -44,6 +44,9 @@ rule all:
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
pid=config["PIDS"],
fitbit_sensor=config["FITBIT_SENSORS"]),
expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
pid = config["PIDS"],
day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
pid = config["PIDS"],
day_segment = config["STEP"]["DAY_SEGMENTS"]),

View File

@ -72,7 +72,6 @@ BLUETOOTH:
GOOGLE_ACTIVITY_RECOGNITION:
DAY_SEGMENTS: *day_segments
METRICS: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']
BATTERY:
DAY_SEGMENTS: *day_segments
@ -92,6 +91,10 @@ ACCELEROMETER:
DAY_SEGMENTS: *day_segments
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
HEARTRATE:
DAY_SEGMENTS: *day_segments
METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
STEP:
DAY_SEGMENTS: *day_segments
METRICS:

View File

@ -131,6 +131,17 @@ rule accelerometer_metrics:
script:
"../src/features/accelerometer_metrics.py"
rule fitbit_heartrate_metrics:
input:
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
params:
day_segment = "{day_segment}",
metrics = config["HEARTRATE"]["METRICS"],
output:
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
script:
"../src/features/fitbit_heartrate_metrics.py"
rule fitbit_step_metrics:
input:
steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv",

View File

@ -0,0 +1,49 @@
import pandas as pd
import numpy as np
from scipy.stats import entropy
import json
heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
day_segment = snakemake.params["day_segment"]
metrics = snakemake.params["metrics"]
heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in metrics])
if not heartrate_data.empty:
device_id = heartrate_data["device_id"][0]
num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
if day_segment != "daily":
heartrate_data =heartrate_data[heartrate_data["local_day_segment"] == day_segment]
if not heartrate_data.empty:
heartrate_features = pd.DataFrame()
# get stats of heartrate
if "maxhr" in metrics:
heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max()
if "minhr" in metrics:
heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min()
if "avghr" in metrics:
heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean()
if "medianhr" in metrics:
heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median()
if "modehr" in metrics:
heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
if "stdhr" in metrics:
heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std()
if "diffmaxmodehr" in metrics:
heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode)
if "diffminmodehr" in metrics:
heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(pd.Series.mode) - heartrate_data.groupby(["local_date"])["heartrate"].min()
if "entropyhr" in metrics:
heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy)
# get number of minutes in each heart rate zone
for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(metrics)):
heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]]
heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
heartrate_features = heartrate_features.reset_index()
heartrate_features.to_csv(snakemake.output[0], index=False)