diff --git a/config.yaml b/config.yaml index b4c2d1ae..543f8adc 100644 --- a/config.yaml +++ b/config.yaml @@ -113,7 +113,7 @@ APPLICATIONS_FOREGROUND: HEARTRATE: DAY_SEGMENTS: *day_segments - METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"] + FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"] STEP: DAY_SEGMENTS: *day_segments diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index 001c0404..e4bb8d65 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -985,7 +985,7 @@ See `Fitbit: Heart Rate Config Code`_ | ``pid=config["PIDS"],`` | ``fitbit_sensor=config["FITBIT_SENSORS"]),`` -- Extract Sensor Metrics: +- Extract Sensor Features: | ``expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",`` | ``pid=config["PIDS"],`` @@ -1001,9 +1001,9 @@ See `Fitbit: Heart Rate Config Code`_ - **Script:** ``src/data/fitbit_readable_datetime.py`` - See the fitbit_readable_datetime.py_ script. -- **Rule:** ``rules/features.snakefile/fitbit_heartrate_metrics`` - See the fitbit_heartrate_metrics_ rule. +- **Rule:** ``rules/features.snakefile/fitbit_heartrate_features`` - See the fitbit_heartrate_features_ rule. - - **Script:** ``src/features/fitbit_heartrate_metrics.py`` - See the fitbit_heartrate_metrics.py_ script. + - **Script:** ``src/features/fitbit_heartrate_features.py`` - See the fitbit_heartrate_features.py_ script. .. _fitbit-heart-rate-parameters: @@ -1014,15 +1014,15 @@ See `Fitbit: Heart Rate Config Code`_ Name Description ============ =================== day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` -metrics The different measures that can be retrieved from the Fitbit: Heart Rate dataset. - See :ref:`Available Fitbit: Heart Rate Metrics ` Table below +features The different measures that can be retrieved from the Fitbit: Heart Rate dataset. + See :ref:`Available Fitbit: Heart Rate Features ` Table below ============ =================== -.. _fitbit-heart-rate-available-metrics: +.. _fitbit-heart-rate-available-features: -**Available Fitbit: Heart Rate Metrics** +**Available Fitbit: Heart Rate Features** -The following table shows a list of the available metrics for the Fitbit: Heart Rate dataset. +The following table shows a list of the available features for the Fitbit: Heart Rate dataset. ================== =========== ============= Name Units Description @@ -1192,8 +1192,8 @@ stddurationactivebout minutes Std duration active bout: The standard .. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L113 .. _fitbit_with_datetime: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L94 .. _fitbit_readable_datetime.py: https://github.com/carissalow/rapids/blob/master/src/data/fitbit_readable_datetime.py -.. _fitbit_heartrate_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L151 -.. _fitbit_heartrate_metrics.py: https://github.com/carissalow/rapids/blob/master/src/features/fitbit_heartrate_metrics.py +.. _fitbit_heartrate_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L151 +.. _fitbit_heartrate_features.py: https://github.com/carissalow/rapids/blob/master/src/features/fitbit_heartrate_features.py .. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L117 .. _fitbit_step_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L162 .. _fitbit_step_features.py: https://github.com/carissalow/rapids/blob/master/src/features/fitbit_step_features.py diff --git a/rules/features.snakefile b/rules/features.snakefile index e7e50a4f..d04bd819 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -149,16 +149,16 @@ rule applications_foreground_features: script: "../src/features/applications_foreground_features.py" -rule fitbit_heartrate_metrics: +rule fitbit_heartrate_features: input: "data/raw/{pid}/fitbit_heartrate_with_datetime.csv", params: day_segment = "{day_segment}", - metrics = config["HEARTRATE"]["METRICS"], + features = config["HEARTRATE"]["FEATURES"], output: "data/processed/{pid}/fitbit_heartrate_{day_segment}.csv" script: - "../src/features/fitbit_heartrate_metrics.py" + "../src/features/fitbit_heartrate_features.py" rule fitbit_step_features: input: diff --git a/src/features/fitbit_heartrate_metrics.py b/src/features/fitbit_heartrate_features.py similarity index 85% rename from src/features/fitbit_heartrate_metrics.py rename to src/features/fitbit_heartrate_features.py index a5848106..652bae6d 100644 --- a/src/features/fitbit_heartrate_metrics.py +++ b/src/features/fitbit_heartrate_features.py @@ -6,10 +6,10 @@ import json heartrate_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) day_segment = snakemake.params["day_segment"] -metrics = snakemake.params["metrics"] +features = snakemake.params["features"] -heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in metrics]) +heartrate_features = pd.DataFrame(columns=["local_date"] + ["heartrate_" + day_segment + "_" + x for x in features]) if not heartrate_data.empty: device_id = heartrate_data["device_id"][0] num_rows_per_minute = heartrate_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"] @@ -20,27 +20,27 @@ if not heartrate_data.empty: heartrate_features = pd.DataFrame() # get stats of heartrate - if "maxhr" in metrics: + if "maxhr" in features: heartrate_features["heartrate_" + day_segment + "_maxhr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - if "minhr" in metrics: + if "minhr" in features: heartrate_features["heartrate_" + day_segment + "_minhr"] = heartrate_data.groupby(["local_date"])["heartrate"].min() - if "avghr" in metrics: + if "avghr" in features: heartrate_features["heartrate_" + day_segment + "_avghr"] = heartrate_data.groupby(["local_date"])["heartrate"].mean() - if "medianhr" in metrics: + if "medianhr" in features: heartrate_features["heartrate_" + day_segment + "_medianhr"] = heartrate_data.groupby(["local_date"])["heartrate"].median() - if "modehr" in metrics: + if "modehr" in features: heartrate_features["heartrate_" + day_segment + "_modehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - if "stdhr" in metrics: + if "stdhr" in features: heartrate_features["heartrate_" + day_segment + "_stdhr"] = heartrate_data.groupby(["local_date"])["heartrate"].std() - if "diffmaxmodehr" in metrics: + if "diffmaxmodehr" in features: heartrate_features["heartrate_" + day_segment + "_diffmaxmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].max() - heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - if "diffminmodehr" in metrics: + if "diffminmodehr" in features: heartrate_features["heartrate_" + day_segment + "_diffminmodehr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data.groupby(["local_date"])["heartrate"].min() - if "entropyhr" in metrics: + if "entropyhr" in features: heartrate_features["heartrate_" + day_segment + "_entropyhr"] = heartrate_data.groupby(["local_date"])["heartrate"].agg(entropy) # get number of minutes in each heart rate zone - for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(metrics)): + for feature_name in list(set(["lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]) & set(features)): heartrate_zone = heartrate_data[heartrate_data["heartrate_zone"] == feature_name[6:]] heartrate_features["heartrate_" + day_segment + "_" + feature_name] = heartrate_zone.groupby(["local_date"])["device_id"].count() / num_rows_per_minute heartrate_features.fillna(value={"heartrate_" + day_segment + "_" + feature_name: 0}, inplace=True)