From 1bdb52dd6516e9fa9a1504e27ff82f7f4939b398 Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 24 Jun 2020 13:33:58 -0400 Subject: [PATCH] Add functionality to exlcude step data based on sleep --- Snakefile | 4 ++- config.yaml | 6 ++++ docs/features/extracted.rst | 5 +++ rules/features.snakefile | 15 +++++++-- src/features/fitbit_step_features.py | 47 +++++++++++++++++++++++++++- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/Snakefile b/Snakefile index 1df92c16..521707a5 100644 --- a/Snakefile +++ b/Snakefile @@ -83,13 +83,15 @@ if config["HEARTRATE"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"])) if config["STEP"]["COMPUTE"]: + if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": + files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"])) if config["SLEEP"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SLEEP"]["DB_TABLE"])) - files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) if config["CONVERSATION"]["COMPUTE"]: diff --git a/config.yaml b/config.yaml index 93c7acf5..1c53848c 100644 --- a/config.yaml +++ b/config.yaml @@ -146,6 +146,12 @@ STEP: COMPUTE: False DB_TABLE: fitbit_data DAY_SEGMENTS: *day_segments + EXCLUDE_SLEEP: + EXCLUDE: True + TYPE: FIXED # FIXED OR FITBIT_BASED (CONFIGURE FITBIT's SLEEP DB_TABLE) + FIXED: + START: "23:00" + END: "07:00" FEATURES: ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"] SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout", "sumdurationsedentarybout"] diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index 87fc5f24..01a808e8 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -919,6 +919,11 @@ Name Description day_segment The particular ``day_segment`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` features The features that can be computed. See :ref:`Available Fitbit: Steps Features ` Table below threshold_active_bout Every minute with Fitbit step data wil be labelled as ``sedentary`` if its step count is below this threshold, otherwise, ``active``. +include_zero_step_rows Whether or not to include day segments with a 0 step count +exclude_sleep Whether or not to exclude step rows that happen during sleep +exclude_sleep_type If ``exclude_sleep`` is True, then you can choose between ``FIXED`` or ``FITBIT_BASED``. ``FIXED`` will exclude all step rows that happen between a start and end time (see below). ``FITBIT_BASED`` will exclude step rows that happen during main sleep segments as measured by the Fitbit device (``config[SLEEP][DB_TABLE]`` should be a valid table in your database, it usually is the same table that contains your STEP data) +exclude_sleep_fixed_start Start time of the fixed sleep period to exclude. Only relevant if ``exclude_sleep`` is True and ``exclude_sleep_type`` is ``FIXED`` +exclude_sleep_fixed_end Start time of the fixed sleep period to exclude. Only relevant if ``exclude_sleep`` is True and ``exclude_sleep_type`` is ``FIXED`` ======================= =================== .. _fitbit-steps-available-features: diff --git a/rules/features.snakefile b/rules/features.snakefile index 6fb9cab3..2003fa73 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -26,6 +26,12 @@ def optional_location_input(wildcards): else: return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) +def optional_steps_sleep_input(wildcards): + if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": + return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv" + else: + return [] + rule messages_features: input: expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]) @@ -226,14 +232,19 @@ rule fitbit_heartrate_features: rule fitbit_step_features: input: - step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv" + step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv", + sleep_data = optional_steps_sleep_input params: day_segment = "{day_segment}", features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"], features_sedentary_bout = config["STEP"]["FEATURES"]["SEDENTARY_BOUT"], features_active_bout = config["STEP"]["FEATURES"]["ACTIVE_BOUT"], threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"], - include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"] + include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"], + exclude_sleep = config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"], + exclude_sleep_type = config["STEP"]["EXCLUDE_SLEEP"]["TYPE"], + exclude_sleep_fixed_start = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["START"], + exclude_sleep_fixed_end = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["END"], output: "data/processed/{pid}/fitbit_step_{day_segment}.csv" script: diff --git a/src/features/fitbit_step_features.py b/src/features/fitbit_step_features.py index b90e19df..d1c3d8bc 100644 --- a/src/features/fitbit_step_features.py +++ b/src/features/fitbit_step_features.py @@ -1,18 +1,63 @@ import pandas as pd import numpy as np +import time from fitbit_step.fitbit_step_base import base_fitbit_step_features +def isInvalidTime(str_time): + try: + time.strptime(str_time, '%H:%M') + return False + except ValueError: + return True + +def isInMainSleep(local_date_time, sleep): + # sleep_period_container = sleep.query("local_start_date_time <= @local_date_time <= local_end_date_time") + sleep_period_container = sleep[(sleep["local_start_date_time"] <= local_date_time) & (local_date_time <= sleep["local_end_date_time"])] + if sleep_period_container.shape[0] >= 1: + return True + else: + return False + +def getStepsOutsideFitbitMainSleep(sleep, steps): + steps['inMainSleep'] = steps.apply(lambda row : isInMainSleep(row['local_date_time'], sleep), axis = 1) + return steps[steps['inMainSleep'] == False] + + +def getStepsOutsideFixedMainSleep(sleepStart, sleepEnd, steps): + steps = steps.set_index('local_date_time') + steps['inMainSleep'] = False + steps.loc[steps.between_time(sleepStart, sleepEnd).index, 'inMainSleep'] = True + steps.reset_index(level=0, inplace=True) + return steps[steps['inMainSleep'] == False] + step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"]) day_segment = snakemake.params["day_segment"] threshold_active_bout = snakemake.params["threshold_active_bout"] include_zero_step_rows = snakemake.params["include_zero_step_rows"] -step_features = pd.DataFrame(columns=["local_date"]) +exclude_sleep = snakemake.params["exclude_sleep"] +exclude_sleep_type = snakemake.params["exclude_sleep_type"] +exclude_sleep_fixed_start = snakemake.params["exclude_sleep_fixed_start"] +exclude_sleep_fixed_end = snakemake.params["exclude_sleep_fixed_end"] +step_features = pd.DataFrame(columns=["local_date"]) requested_features = {} requested_features["features_all_steps"] = snakemake.params["features_all_steps"] requested_features["features_sedentary_bout"] = snakemake.params["features_sedentary_bout"] requested_features["features_active_bout"] = snakemake.params["features_active_bout"] +if exclude_sleep == True: + if exclude_sleep_type == "FIXED": + if isInvalidTime(exclude_sleep_fixed_start): + raise ValueError("Your fixed start time has an invalid format in your config.yml file") + if isInvalidTime(exclude_sleep_fixed_end): + raise ValueError("Your fixed end time has an invalid format in your config.yml file") + step_data = getStepsOutsideFixedMainSleep(exclude_sleep_fixed_start, exclude_sleep_fixed_end, step_data) + elif exclude_sleep_type == "FITBIT_BASED": + sleep_data = pd.read_csv(snakemake.input["sleep_data"], parse_dates=["local_start_date_time", "local_end_date_time"]) + step_data = getStepsOutsideFitbitMainSleep(sleep_data, step_data) + else: + raise ValueError("We only support FIXED or FITBIT_BASED to filter step data based on sleep data. You typed " + exclude_sleep_type + ", Check your config.yaml file for typos") + step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer")