Update heartrate features for segments
parent
d5931c75d8
commit
ed7585c2bf
|
@ -161,6 +161,8 @@ for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_features/fitbit_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
|
|
@ -265,8 +265,11 @@ FITBIT_HEARTRATE:
|
|||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||
FEATURES:
|
||||
SUMMARY: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||
INTRADAY: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
|
||||
FITBIT_STEPS:
|
||||
|
|
|
@ -216,6 +216,19 @@ rule phone_wifi_visible_r_features:
|
|||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule fitbit_heartrate_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", fitbit_data_type=["summary", "intraday"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_HEARTRATE"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_heartrate"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_heartrate_features/fitbit_heartrate_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
# rule fitbit_heartrate_features:
|
||||
# input:
|
||||
# heartrate_summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
||||
|
|
|
@ -1,76 +0,0 @@
|
|||
import pandas as pd
|
||||
from scipy.stats import entropy
|
||||
|
||||
def extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features):
|
||||
heartrate_summary_features = pd.DataFrame()
|
||||
if "restinghr" in summary_features:
|
||||
heartrate_summary_features["heartrate_daily_restinghr"] = heartrate_summary_data["heartrate_daily_restinghr"]
|
||||
# calories features might be inaccurate: they depend on users' fitbit profile (weight, height, etc.)
|
||||
if "caloriesoutofrange" in summary_features:
|
||||
heartrate_summary_features["heartrate_daily_caloriesoutofrange"] = heartrate_summary_data["heartrate_daily_caloriesoutofrange"]
|
||||
if "caloriesfatburn" in summary_features:
|
||||
heartrate_summary_features["heartrate_daily_caloriesfatburn"] = heartrate_summary_data["heartrate_daily_caloriesfatburn"]
|
||||
if "caloriescardio" in summary_features:
|
||||
heartrate_summary_features["heartrate_daily_caloriescardio"] = heartrate_summary_data["heartrate_daily_caloriescardio"]
|
||||
if "caloriespeak" in summary_features:
|
||||
heartrate_summary_features["heartrate_daily_caloriespeak"] = heartrate_summary_data["heartrate_daily_caloriespeak"]
|
||||
heartrate_summary_features.reset_index(inplace=True)
|
||||
|
||||
return heartrate_summary_features
|
||||
|
||||
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, day_segment):
|
||||
heartrate_intraday_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in features])
|
||||
if not heartrate_intraday_data.empty:
|
||||
device_id = heartrate_intraday_data["device_id"][0]
|
||||
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||
if day_segment != "daily":
|
||||
heartrate_intraday_data = heartrate_intraday_data[heartrate_intraday_data["local_day_segment"] == day_segment]
|
||||
|
||||
if not heartrate_intraday_data.empty:
|
||||
heartrate_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of heartrate
|
||||
if "maxhr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_maxhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].max()
|
||||
if "minhr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_minhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].min()
|
||||
if "avghr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_avghr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].mean()
|
||||
if "medianhr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_medianhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].median()
|
||||
if "modehr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_modehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdhr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_stdhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].std()
|
||||
if "diffmaxmodehr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].max() - heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodehr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].min()
|
||||
if "entropyhr" in features:
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_intraday_data[["local_date", "heartrate"]].groupby(["local_date"])["heartrate"].agg(entropy)
|
||||
|
||||
# get number of minutes in each heart rate zone
|
||||
for feature_name in list(set(["minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]) & set(features)):
|
||||
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[9:-4]]
|
||||
heartrate_intraday_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute
|
||||
heartrate_intraday_features.fillna(value={"heartrate_" + day_segment + "_" + feature_name: 0}, inplace=True)
|
||||
heartrate_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return heartrate_intraday_features
|
||||
|
||||
def base_fitbit_heartrate_features(heartrate_summary_data, heartrate_intraday_data, day_segment, requested_summary_features, requested_intraday_features):
|
||||
# name of the features this function can compute
|
||||
base_summary_features_names = ["restinghr", "caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||
# the subset of requested features this function can compute
|
||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, day_segment)
|
||||
if not heartrate_summary_data.empty and day_segment == "daily" and summary_features_to_compute != []:
|
||||
heartrate_summary_features = extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features_to_compute)
|
||||
heartrate_features = heartrate_intraday_features.merge(heartrate_summary_features, on=["local_date"], how="outer")
|
||||
else:
|
||||
heartrate_features = heartrate_intraday_features
|
||||
|
||||
return heartrate_features
|
|
@ -0,0 +1,94 @@
|
|||
import pandas as pd
|
||||
from scipy.stats import entropy
|
||||
|
||||
def extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features):
|
||||
heartrate_summary_data.set_index("local_segment", inplace=True)
|
||||
heartrate_summary_features = pd.DataFrame()
|
||||
if "restinghr" in summary_features:
|
||||
heartrate_summary_features["heartrate_rapids_restinghr"] = heartrate_summary_data["heartrate_daily_restinghr"]
|
||||
# calories features might be inaccurate: they depend on users' fitbit profile (weight, height, etc.)
|
||||
if "caloriesoutofrange" in summary_features:
|
||||
heartrate_summary_features["heartrate_rapids_caloriesoutofrange"] = heartrate_summary_data["heartrate_daily_caloriesoutofrange"]
|
||||
if "caloriesfatburn" in summary_features:
|
||||
heartrate_summary_features["heartrate_rapids_caloriesfatburn"] = heartrate_summary_data["heartrate_daily_caloriesfatburn"]
|
||||
if "caloriescardio" in summary_features:
|
||||
heartrate_summary_features["heartrate_rapids_caloriescardio"] = heartrate_summary_data["heartrate_daily_caloriescardio"]
|
||||
if "caloriespeak" in summary_features:
|
||||
heartrate_summary_features["heartrate_rapids_caloriespeak"] = heartrate_summary_data["heartrate_daily_caloriespeak"]
|
||||
heartrate_summary_features.reset_index(inplace=True)
|
||||
|
||||
return heartrate_summary_features
|
||||
|
||||
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, day_segment, filter_data_by_segment):
|
||||
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in features])
|
||||
if not heartrate_intraday_data.empty:
|
||||
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||
heartrate_intraday_data = filter_data_by_segment(heartrate_intraday_data, day_segment)
|
||||
|
||||
if not heartrate_intraday_data.empty:
|
||||
heartrate_intraday_features = pd.DataFrame()
|
||||
|
||||
# get stats of heartrate
|
||||
if "maxhr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_maxhr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].max()
|
||||
if "minhr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_minhr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].min()
|
||||
if "avghr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_avghr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].mean()
|
||||
if "medianhr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_medianhr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].median()
|
||||
if "modehr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_modehr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "stdhr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_stdhr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].std()
|
||||
if "diffmaxmodehr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_diffmaxmodehr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].max() - heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "diffminmodehr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_diffminmodehr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].min()
|
||||
if "entropyhr" in features:
|
||||
heartrate_intraday_features["heartrate_rapids_entropyhr"] = heartrate_intraday_data[["local_segment", "heartrate"]].groupby(["local_segment"])["heartrate"].agg(entropy)
|
||||
|
||||
# get number of minutes in each heart rate zone
|
||||
for feature_name in list(set(["minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]) & set(features)):
|
||||
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[9:-4]]
|
||||
heartrate_intraday_features["heartrate_rapids_" + feature_name] = heartrate_zone.groupby(["local_segment"])["device_id"].count() / num_rows_per_minute
|
||||
heartrate_intraday_features.fillna(value={"heartrate_rapids_" + feature_name: 0}, inplace=True)
|
||||
heartrate_intraday_features.reset_index(inplace=True)
|
||||
|
||||
return heartrate_intraday_features
|
||||
|
||||
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
heartrate_summary_data = pd.read_csv(sensor_data_files["sensor_data"][0])
|
||||
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"][1])
|
||||
|
||||
requested_summary_features = provider["FEATURES"]["SUMMARY"]
|
||||
requested_intraday_features = provider["FEATURES"]["INTRADAY"]
|
||||
# name of the features this function can compute
|
||||
base_summary_features_names = ["restinghr", "caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||
# the subset of requested features this function can compute
|
||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||
|
||||
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, day_segment, filter_data_by_segment)
|
||||
if not heartrate_summary_data.empty and day_segment == "daily" and summary_features_to_compute != []:
|
||||
# filter by segment and skipping any non-daily segment
|
||||
heartrate_summary_data = filter_data_by_segment(heartrate_summary_data, "daily")
|
||||
|
||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||
|
||||
segment_regex = "daily#{},{}".format(datetime_start_regex, datetime_end_regex)
|
||||
heartrate_summary_data = heartrate_summary_data[heartrate_summary_data["local_segment"].str.match(segment_regex)]
|
||||
|
||||
# extract daily features from summary data
|
||||
heartrate_summary_features = extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features_to_compute)
|
||||
|
||||
# merge summary features and intraday features
|
||||
heartrate_features = heartrate_intraday_features.merge(heartrate_summary_features, on=["local_segment"], how="outer")
|
||||
else:
|
||||
heartrate_features = heartrate_intraday_features
|
||||
|
||||
return heartrate_features
|
|
@ -1,16 +0,0 @@
|
|||
import pandas as pd
|
||||
from fitbit_heartrate.fitbit_heartrate_base import base_fitbit_heartrate_features
|
||||
|
||||
heartrate_summary_data = pd.read_csv(snakemake.input["heartrate_summary_data"], index_col=["local_date"], parse_dates=["local_date"])
|
||||
heartrate_intraday_data = pd.read_csv(snakemake.input["heartrate_intraday_data"], parse_dates=["local_date_time", "local_date"])
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
requested_summary_features = snakemake.params["summary_features"]
|
||||
requested_intraday_features = snakemake.params["intraday_features"]
|
||||
heartrate_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
heartrate_features = heartrate_features.merge(base_fitbit_heartrate_features(heartrate_summary_data, heartrate_intraday_data, day_segment, requested_summary_features, requested_intraday_features), on="local_date", how="outer")
|
||||
|
||||
requested_features = requested_summary_features + requested_intraday_features if day_segment == "daily" else requested_intraday_features
|
||||
assert len(requested_features) + 1 == heartrate_features.shape[1], "The number of features in the output dataframe (=" + str(heartrate_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your fitbit heartrate feature extraction functions"
|
||||
|
||||
heartrate_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue