Add the EXCLUDE_SLEEP module for steps intraday features

pull/136/head
Meng Li 2021-05-18 18:27:12 -04:00
parent 5e451f99b0
commit edf71e055d
16 changed files with 155 additions and 6 deletions

View File

@ -284,6 +284,12 @@ for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys():
for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
if config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["TIME_BASED"]["EXCLUDE"] or config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]:
if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))

View File

@ -436,6 +436,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -2,6 +2,8 @@
## v1.2.1 (in progress)
- Fix PID matching when joining data from multiple participants. We can handle PIDS with an arbitrary format.
- Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable
- New keyboard features
- Add the `EXCLUDE_SLEEP` module for steps intraday features
## v1.2.0
- Sleep summary and intraday features are more consistent.
- Add wake and bedtime features for sleep summary data.

View File

@ -5,7 +5,7 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[CONTAINER]`| Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
|`[EXCLUDE_SLEEP]` | Step data will be excluded if it was logged during sleep periods when at least one `[EXCLUDE]` flag is set to `True`. Sleep can be delimited by (1) a fixed period that repeats on every day if `[TIME_BASED][EXCLUDE]` is True or (2) by Fitbit summary sleep episodes if `[FITBIT_BASED][EXCLUDE]` is True. If both are True (3), we use all Fitbit sleep episodes as well as the time-based episodes that do not overlap with any Fitbit episodes. If `[TIME_BASED][EXCLUDE]` is True, make sure Fitbit sleep summary container points to a valid table or file.
## RAPIDS provider
@ -16,6 +16,8 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`:
```bash
- data/raw/{pid}/fitbit_steps_intraday_raw.csv
- data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv
- data/raw/{pid}/fitbit_sleep_summary_raw.csv (Only when [EXCLUDE_SLEEP][EXCLUDE]=True and [EXCLUDE_SLEEP][TYPE]=FITBIT_BASED)
- data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv (Only when [EXCLUDE_SLEEP][EXCLUDE]=True)
- data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv
- data/processed/features/{pid}/fitbit_steps_intraday.csv
```

View File

@ -405,6 +405,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: fitbit_data.csv
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: True

View File

@ -24,11 +24,17 @@ def find_features_files(wildcards):
return(feature_files)
def optional_steps_sleep_input(wildcards):
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]:
return "data/raw/{pid}/fitbit_sleep_summary_raw.csv"
else:
return []
def optional_steps_intraday_input(wildcards):
if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["TIME_BASED"]["EXCLUDE"] or config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]:
return "data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv"
else:
return "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv"
def input_merge_sensor_features_for_individual_participants(wildcards):
feature_files = []
for config_key in config.keys():

View File

@ -648,7 +648,7 @@ rule fitbit_steps_summary_r_features:
rule fitbit_steps_intraday_python_features:
input:
sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv",
sensor_data = optional_steps_intraday_input,
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
@ -661,7 +661,7 @@ rule fitbit_steps_intraday_python_features:
rule fitbit_steps_intraday_r_features:
input:
sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv",
sensor_data = optional_steps_intraday_input,
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -205,6 +205,17 @@ rule fitbit_readable_datetime:
script:
"../src/data/datetime/readable_datetime.R"
rule fitbit_steps_intraday_exclude_sleep:
input:
sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv",
sleep_data = optional_steps_sleep_input
params:
exclude_sleep = config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]
output:
"data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv"
script:
"../src/data/fitbit_steps_intraday_exclude_sleep.py"
rule empatica_readable_datetime:
input:
sensor_input = "data/raw/{pid}/empatica_{sensor}_raw.csv",

View File

@ -0,0 +1,47 @@
import pandas as pd
from datetime import timedelta
exclude_sleep = snakemake.params["exclude_sleep"]
exclude_time_based = exclude_sleep["TIME_BASED"]["EXCLUDE"]
exclude_fitbit_based = exclude_sleep["FITBIT_BASED"]["EXCLUDE"]
exclude_sleep_fixed_start = exclude_sleep["TIME_BASED"]["START_TIME"] + ":00"
exclude_sleep_fixed_end = exclude_sleep["TIME_BASED"]["END_TIME"] + ":00"
steps_intraday_data = pd.read_csv(snakemake.input["sensor_data"], parse_dates=["local_date"])
sleep_data = pd.read_csv(snakemake.input["sleep_data"]).dropna(subset=["local_start_date_time", "local_end_date_time"], how="any") if snakemake.input["sleep_data"] else pd.DataFrame()
if not steps_intraday_data.empty:
if exclude_time_based and (not exclude_fitbit_based or (exclude_fitbit_based and sleep_data.empty)):
query_string = "local_time < @exclude_sleep_fixed_start " + ("&" if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else "|") + " local_time > @exclude_sleep_fixed_end"
steps_intraday_data.query(query_string, inplace=True)
elif exclude_fitbit_based and (not sleep_data.empty):
queries = []
if exclude_time_based:
# Get fixed intervals
fixed_start_dates = pd.date_range(steps_intraday_data["local_date"].min() - timedelta(days=1), steps_intraday_data["local_date"].max())
fixed_end_dates = fixed_start_dates + timedelta(days=1) if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else fixed_start_dates
fixed_time = pd.DataFrame({"fixed_start_date_time": fixed_start_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_start,
"fixed_end_date_time": fixed_end_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_end})
# Remove fixed intervals that intersect with sleep intervals from the fixed sleep periods
sleep_data["query_intersect"] = "(fixed_start_date_time < '" + sleep_data["local_end_date_time"] + "' & fixed_end_date_time > '" + sleep_data["local_start_date_time"] + "')"
query_string_subtract = "~(" + " | ".join(sleep_data["query_intersect"].tolist()) + ")"
fixed_time.query(query_string_subtract, inplace=True)
# Add TIME_BASED query to queries
queries = ("(local_date_time < '" + fixed_time["fixed_start_date_time"] + "' | local_date_time > '" + fixed_time["fixed_end_date_time"] + "')").tolist()
# Add FITBIT_BASED query to queries
queries = queries + ("(local_date_time < '" + sleep_data["local_start_date_time"] + "' | local_date_time > '" + sleep_data["local_end_date_time"] + "')").tolist()
steps_intraday_data.query(" & ".join(queries), inplace=True)
steps_intraday_data.to_csv(snakemake.output[0], index=False)

View File

@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -438,6 +438,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -438,6 +438,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY:
# See https://www.rapids.science/latest/features/fitbit-steps-intraday/
FITBIT_STEPS_INTRADAY:
CONTAINER: steps_intraday
EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods
TIME_BASED:
EXCLUDE: False
START_TIME: "23:00"
END_TIME: "07:00"
FITBIT_BASED:
EXCLUDE: False
PROVIDERS:
RAPIDS:
COMPUTE: False

View File

@ -1090,10 +1090,29 @@ properties:
FITBIT_STEPS_INTRADAY:
type: object
required: [CONTAINER, PROVIDERS]
required: [CONTAINER, EXCLUDE_SLEEP, PROVIDERS]
properties:
CONTAINER:
type: string
EXCLUDE_SLEEP:
required: [TIME_BASED, FITBIT_BASED]
properties:
TIME_BASED:
required: [EXCLUDE, START_TIME, END_TIME]
properties:
EXCLUDE:
type: boolean
START_TIME:
type: string
pattern: "^([0-1][0-9]|2[0-3]):[0-5][0-9]$"
END_TIME:
type: string
pattern: "^([0-1][0-9]|2[0-3]):[0-5][0-9]$"
FITBIT_BASED:
required: [EXCLUDE]
properties:
EXCLUDE:
type: boolean
PROVIDERS:
type: ["null", object]
properties: