Add functionality to exlcude step data based on sleep

pull/95/head
JulioV 2020-06-24 13:33:58 -04:00
parent fd3aedf634
commit 1bdb52dd65
5 changed files with 73 additions and 4 deletions

View File

@ -83,13 +83,15 @@ if config["HEARTRATE"]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]))
if config["STEP"]["COMPUTE"]: if config["STEP"]["COMPUTE"]:
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]))
if config["SLEEP"]["COMPUTE"]: if config["SLEEP"]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SLEEP"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SLEEP"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
if config["CONVERSATION"]["COMPUTE"]: if config["CONVERSATION"]["COMPUTE"]:

View File

@ -146,6 +146,12 @@ STEP:
COMPUTE: False COMPUTE: False
DB_TABLE: fitbit_data DB_TABLE: fitbit_data
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
EXCLUDE_SLEEP:
EXCLUDE: True
TYPE: FIXED # FIXED OR FITBIT_BASED (CONFIGURE FITBIT's SLEEP DB_TABLE)
FIXED:
START: "23:00"
END: "07:00"
FEATURES: FEATURES:
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"] ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout", "sumdurationsedentarybout"] SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout", "sumdurationsedentarybout"]

View File

@ -919,6 +919,11 @@ Name Description
day_segment The particular ``day_segment`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` day_segment The particular ``day_segment`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
features The features that can be computed. See :ref:`Available Fitbit: Steps Features <fitbit-steps-available-features>` Table below features The features that can be computed. See :ref:`Available Fitbit: Steps Features <fitbit-steps-available-features>` Table below
threshold_active_bout Every minute with Fitbit step data wil be labelled as ``sedentary`` if its step count is below this threshold, otherwise, ``active``. threshold_active_bout Every minute with Fitbit step data wil be labelled as ``sedentary`` if its step count is below this threshold, otherwise, ``active``.
include_zero_step_rows Whether or not to include day segments with a 0 step count
exclude_sleep Whether or not to exclude step rows that happen during sleep
exclude_sleep_type If ``exclude_sleep`` is True, then you can choose between ``FIXED`` or ``FITBIT_BASED``. ``FIXED`` will exclude all step rows that happen between a start and end time (see below). ``FITBIT_BASED`` will exclude step rows that happen during main sleep segments as measured by the Fitbit device (``config[SLEEP][DB_TABLE]`` should be a valid table in your database, it usually is the same table that contains your STEP data)
exclude_sleep_fixed_start Start time of the fixed sleep period to exclude. Only relevant if ``exclude_sleep`` is True and ``exclude_sleep_type`` is ``FIXED``
exclude_sleep_fixed_end Start time of the fixed sleep period to exclude. Only relevant if ``exclude_sleep`` is True and ``exclude_sleep_type`` is ``FIXED``
======================= =================== ======================= ===================
.. _fitbit-steps-available-features: .. _fitbit-steps-available-features:

View File

@ -26,6 +26,12 @@ def optional_location_input(wildcards):
else: else:
return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"])
def optional_steps_sleep_input(wildcards):
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
else:
return []
rule messages_features: rule messages_features:
input: input:
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]) expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])
@ -226,14 +232,19 @@ rule fitbit_heartrate_features:
rule fitbit_step_features: rule fitbit_step_features:
input: input:
step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv" step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv",
sleep_data = optional_steps_sleep_input
params: params:
day_segment = "{day_segment}", day_segment = "{day_segment}",
features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"], features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"],
features_sedentary_bout = config["STEP"]["FEATURES"]["SEDENTARY_BOUT"], features_sedentary_bout = config["STEP"]["FEATURES"]["SEDENTARY_BOUT"],
features_active_bout = config["STEP"]["FEATURES"]["ACTIVE_BOUT"], features_active_bout = config["STEP"]["FEATURES"]["ACTIVE_BOUT"],
threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"], threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"],
include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"] include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"],
exclude_sleep = config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"],
exclude_sleep_type = config["STEP"]["EXCLUDE_SLEEP"]["TYPE"],
exclude_sleep_fixed_start = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["START"],
exclude_sleep_fixed_end = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["END"],
output: output:
"data/processed/{pid}/fitbit_step_{day_segment}.csv" "data/processed/{pid}/fitbit_step_{day_segment}.csv"
script: script:

View File

@ -1,18 +1,63 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import time
from fitbit_step.fitbit_step_base import base_fitbit_step_features from fitbit_step.fitbit_step_base import base_fitbit_step_features
def isInvalidTime(str_time):
try:
time.strptime(str_time, '%H:%M')
return False
except ValueError:
return True
def isInMainSleep(local_date_time, sleep):
# sleep_period_container = sleep.query("local_start_date_time <= @local_date_time <= local_end_date_time")
sleep_period_container = sleep[(sleep["local_start_date_time"] <= local_date_time) & (local_date_time <= sleep["local_end_date_time"])]
if sleep_period_container.shape[0] >= 1:
return True
else:
return False
def getStepsOutsideFitbitMainSleep(sleep, steps):
steps['inMainSleep'] = steps.apply(lambda row : isInMainSleep(row['local_date_time'], sleep), axis = 1)
return steps[steps['inMainSleep'] == False]
def getStepsOutsideFixedMainSleep(sleepStart, sleepEnd, steps):
steps = steps.set_index('local_date_time')
steps['inMainSleep'] = False
steps.loc[steps.between_time(sleepStart, sleepEnd).index, 'inMainSleep'] = True
steps.reset_index(level=0, inplace=True)
return steps[steps['inMainSleep'] == False]
step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"]) step_data = pd.read_csv(snakemake.input["step_data"], parse_dates=["local_date_time", "local_date"])
day_segment = snakemake.params["day_segment"] day_segment = snakemake.params["day_segment"]
threshold_active_bout = snakemake.params["threshold_active_bout"] threshold_active_bout = snakemake.params["threshold_active_bout"]
include_zero_step_rows = snakemake.params["include_zero_step_rows"] include_zero_step_rows = snakemake.params["include_zero_step_rows"]
step_features = pd.DataFrame(columns=["local_date"]) exclude_sleep = snakemake.params["exclude_sleep"]
exclude_sleep_type = snakemake.params["exclude_sleep_type"]
exclude_sleep_fixed_start = snakemake.params["exclude_sleep_fixed_start"]
exclude_sleep_fixed_end = snakemake.params["exclude_sleep_fixed_end"]
step_features = pd.DataFrame(columns=["local_date"])
requested_features = {} requested_features = {}
requested_features["features_all_steps"] = snakemake.params["features_all_steps"] requested_features["features_all_steps"] = snakemake.params["features_all_steps"]
requested_features["features_sedentary_bout"] = snakemake.params["features_sedentary_bout"] requested_features["features_sedentary_bout"] = snakemake.params["features_sedentary_bout"]
requested_features["features_active_bout"] = snakemake.params["features_active_bout"] requested_features["features_active_bout"] = snakemake.params["features_active_bout"]
if exclude_sleep == True:
if exclude_sleep_type == "FIXED":
if isInvalidTime(exclude_sleep_fixed_start):
raise ValueError("Your fixed start time has an invalid format in your config.yml file")
if isInvalidTime(exclude_sleep_fixed_end):
raise ValueError("Your fixed end time has an invalid format in your config.yml file")
step_data = getStepsOutsideFixedMainSleep(exclude_sleep_fixed_start, exclude_sleep_fixed_end, step_data)
elif exclude_sleep_type == "FITBIT_BASED":
sleep_data = pd.read_csv(snakemake.input["sleep_data"], parse_dates=["local_start_date_time", "local_end_date_time"])
step_data = getStepsOutsideFitbitMainSleep(sleep_data, step_data)
else:
raise ValueError("We only support FIXED or FITBIT_BASED to filter step data based on sleep data. You typed " + exclude_sleep_type + ", Check your config.yaml file for typos")
step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer") step_features = step_features.merge(base_fitbit_step_features(step_data, day_segment, requested_features, threshold_active_bout, include_zero_step_rows), on="local_date", how="outer")