From edf71e055da3802f48525b7108e70e1725cf07b1 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Tue, 18 May 2021 18:27:12 -0400 Subject: [PATCH] Add the EXCLUDE_SLEEP module for steps intraday features --- Snakefile | 6 +++ config.yaml | 7 +++ docs/change-log.md | 2 + docs/features/fitbit-steps-intraday.md | 4 +- example_profile/example_config.yaml | 7 +++ rules/common.smk | 10 +++- rules/features.smk | 4 +- rules/preprocessing.smk | 11 +++++ .../fitbit_steps_intraday_exclude_sleep.py | 47 +++++++++++++++++++ tests/settings/mtz_event_config.yaml | 7 +++ tests/settings/mtz_frequency_config.yaml | 7 +++ tests/settings/mtz_periodic_config.yaml | 7 +++ tests/settings/stz_event_config.yaml | 7 +++ tests/settings/stz_frequency_config.yaml | 7 +++ tests/settings/stz_periodic_config.yaml | 7 +++ tools/config.schema.yaml | 21 ++++++++- 16 files changed, 155 insertions(+), 6 deletions(-) create mode 100644 src/data/fitbit_steps_intraday_exclude_sleep.py diff --git a/Snakefile b/Snakefile index 1491240a..be9e1690 100644 --- a/Snakefile +++ b/Snakefile @@ -284,6 +284,12 @@ for provider in config["FITBIT_STEPS_SUMMARY"]["PROVIDERS"].keys(): for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys(): if config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]: + + if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["TIME_BASED"]["EXCLUDE"] or config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]: + if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]: + files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_summary_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) diff --git a/config.yaml b/config.yaml index d2b12c09..7a548078 100644 --- a/config.yaml +++ b/config.yaml @@ -436,6 +436,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/docs/change-log.md b/docs/change-log.md index b460ee24..cc444738 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -2,6 +2,8 @@ ## v1.2.1 (in progress) - Fix PID matching when joining data from multiple participants. We can handle PIDS with an arbitrary format. - Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable +- New keyboard features +- Add the `EXCLUDE_SLEEP` module for steps intraday features ## v1.2.0 - Sleep summary and intraday features are more consistent. - Add wake and bedtime features for sleep summary data. diff --git a/docs/features/fitbit-steps-intraday.md b/docs/features/fitbit-steps-intraday.md index 4b9ab6f9..a2db9173 100644 --- a/docs/features/fitbit-steps-intraday.md +++ b/docs/features/fitbit-steps-intraday.md @@ -5,7 +5,7 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- |`[CONTAINER]`| Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | - +|`[EXCLUDE_SLEEP]` | Step data will be excluded if it was logged during sleep periods when at least one `[EXCLUDE]` flag is set to `True`. Sleep can be delimited by (1) a fixed period that repeats on every day if `[TIME_BASED][EXCLUDE]` is True or (2) by Fitbit summary sleep episodes if `[FITBIT_BASED][EXCLUDE]` is True. If both are True (3), we use all Fitbit sleep episodes as well as the time-based episodes that do not overlap with any Fitbit episodes. If `[TIME_BASED][EXCLUDE]` is True, make sure Fitbit sleep summary container points to a valid table or file. ## RAPIDS provider @@ -16,6 +16,8 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`: ```bash - data/raw/{pid}/fitbit_steps_intraday_raw.csv - data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv + - data/raw/{pid}/fitbit_sleep_summary_raw.csv (Only when [EXCLUDE_SLEEP][EXCLUDE]=True and [EXCLUDE_SLEEP][TYPE]=FITBIT_BASED) + - data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv (Only when [EXCLUDE_SLEEP][EXCLUDE]=True) - data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv - data/processed/features/{pid}/fitbit_steps_intraday.csv ``` diff --git a/example_profile/example_config.yaml b/example_profile/example_config.yaml index 2ca8e045..33c6fa2e 100644 --- a/example_profile/example_config.yaml +++ b/example_profile/example_config.yaml @@ -405,6 +405,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: fitbit_data.csv + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: True diff --git a/rules/common.smk b/rules/common.smk index 6651677a..550b2507 100644 --- a/rules/common.smk +++ b/rules/common.smk @@ -24,11 +24,17 @@ def find_features_files(wildcards): return(feature_files) def optional_steps_sleep_input(wildcards): - if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": - return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv" + if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]: + return "data/raw/{pid}/fitbit_sleep_summary_raw.csv" else: return [] +def optional_steps_intraday_input(wildcards): + if config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["TIME_BASED"]["EXCLUDE"] or config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]["FITBIT_BASED"]["EXCLUDE"]: + return "data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv" + else: + return "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv" + def input_merge_sensor_features_for_individual_participants(wildcards): feature_files = [] for config_key in config.keys(): diff --git a/rules/features.smk b/rules/features.smk index 5a37ceaa..158c03e9 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -648,7 +648,7 @@ rule fitbit_steps_summary_r_features: rule fitbit_steps_intraday_python_features: input: - sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", + sensor_data = optional_steps_intraday_input, time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], @@ -661,7 +661,7 @@ rule fitbit_steps_intraday_python_features: rule fitbit_steps_intraday_r_features: input: - sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", + sensor_data = optional_steps_intraday_input, time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index 5001734b..281ecb86 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -205,6 +205,17 @@ rule fitbit_readable_datetime: script: "../src/data/datetime/readable_datetime.R" +rule fitbit_steps_intraday_exclude_sleep: + input: + sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", + sleep_data = optional_steps_sleep_input + params: + exclude_sleep = config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"] + output: + "data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv" + script: + "../src/data/fitbit_steps_intraday_exclude_sleep.py" + rule empatica_readable_datetime: input: sensor_input = "data/raw/{pid}/empatica_{sensor}_raw.csv", diff --git a/src/data/fitbit_steps_intraday_exclude_sleep.py b/src/data/fitbit_steps_intraday_exclude_sleep.py new file mode 100644 index 00000000..9e76e7c3 --- /dev/null +++ b/src/data/fitbit_steps_intraday_exclude_sleep.py @@ -0,0 +1,47 @@ +import pandas as pd +from datetime import timedelta + + +exclude_sleep = snakemake.params["exclude_sleep"] +exclude_time_based = exclude_sleep["TIME_BASED"]["EXCLUDE"] +exclude_fitbit_based = exclude_sleep["FITBIT_BASED"]["EXCLUDE"] +exclude_sleep_fixed_start = exclude_sleep["TIME_BASED"]["START_TIME"] + ":00" +exclude_sleep_fixed_end = exclude_sleep["TIME_BASED"]["END_TIME"] + ":00" + + +steps_intraday_data = pd.read_csv(snakemake.input["sensor_data"], parse_dates=["local_date"]) +sleep_data = pd.read_csv(snakemake.input["sleep_data"]).dropna(subset=["local_start_date_time", "local_end_date_time"], how="any") if snakemake.input["sleep_data"] else pd.DataFrame() + +if not steps_intraday_data.empty: + + if exclude_time_based and (not exclude_fitbit_based or (exclude_fitbit_based and sleep_data.empty)): + query_string = "local_time < @exclude_sleep_fixed_start " + ("&" if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else "|") + " local_time > @exclude_sleep_fixed_end" + steps_intraday_data.query(query_string, inplace=True) + + elif exclude_fitbit_based and (not sleep_data.empty): + + queries = [] + + if exclude_time_based: + + # Get fixed intervals + fixed_start_dates = pd.date_range(steps_intraday_data["local_date"].min() - timedelta(days=1), steps_intraday_data["local_date"].max()) + fixed_end_dates = fixed_start_dates + timedelta(days=1) if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else fixed_start_dates + + fixed_time = pd.DataFrame({"fixed_start_date_time": fixed_start_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_start, + "fixed_end_date_time": fixed_end_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_end}) + + # Remove fixed intervals that intersect with sleep intervals from the fixed sleep periods + sleep_data["query_intersect"] = "(fixed_start_date_time < '" + sleep_data["local_end_date_time"] + "' & fixed_end_date_time > '" + sleep_data["local_start_date_time"] + "')" + query_string_subtract = "~(" + " | ".join(sleep_data["query_intersect"].tolist()) + ")" + fixed_time.query(query_string_subtract, inplace=True) + + # Add TIME_BASED query to queries + queries = ("(local_date_time < '" + fixed_time["fixed_start_date_time"] + "' | local_date_time > '" + fixed_time["fixed_end_date_time"] + "')").tolist() + + # Add FITBIT_BASED query to queries + queries = queries + ("(local_date_time < '" + sleep_data["local_start_date_time"] + "' | local_date_time > '" + sleep_data["local_end_date_time"] + "')").tolist() + + steps_intraday_data.query(" & ".join(queries), inplace=True) + +steps_intraday_data.to_csv(snakemake.output[0], index=False) diff --git a/tests/settings/mtz_event_config.yaml b/tests/settings/mtz_event_config.yaml index 3100a88c..9a286ce9 100644 --- a/tests/settings/mtz_event_config.yaml +++ b/tests/settings/mtz_event_config.yaml @@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tests/settings/mtz_frequency_config.yaml b/tests/settings/mtz_frequency_config.yaml index d38c25dc..0c6482cc 100644 --- a/tests/settings/mtz_frequency_config.yaml +++ b/tests/settings/mtz_frequency_config.yaml @@ -438,6 +438,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tests/settings/mtz_periodic_config.yaml b/tests/settings/mtz_periodic_config.yaml index 35e463ee..a492ce7d 100644 --- a/tests/settings/mtz_periodic_config.yaml +++ b/tests/settings/mtz_periodic_config.yaml @@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tests/settings/stz_event_config.yaml b/tests/settings/stz_event_config.yaml index 6ab81f09..7550821e 100644 --- a/tests/settings/stz_event_config.yaml +++ b/tests/settings/stz_event_config.yaml @@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tests/settings/stz_frequency_config.yaml b/tests/settings/stz_frequency_config.yaml index 88ff8126..56093b2b 100644 --- a/tests/settings/stz_frequency_config.yaml +++ b/tests/settings/stz_frequency_config.yaml @@ -438,6 +438,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tests/settings/stz_periodic_config.yaml b/tests/settings/stz_periodic_config.yaml index 8b583941..d6d47d88 100644 --- a/tests/settings/stz_periodic_config.yaml +++ b/tests/settings/stz_periodic_config.yaml @@ -437,6 +437,13 @@ FITBIT_STEPS_SUMMARY: # See https://www.rapids.science/latest/features/fitbit-steps-intraday/ FITBIT_STEPS_INTRADAY: CONTAINER: steps_intraday + EXCLUDE_SLEEP: # you can exclude step data that was logged during sleep periods + TIME_BASED: + EXCLUDE: False + START_TIME: "23:00" + END_TIME: "07:00" + FITBIT_BASED: + EXCLUDE: False PROVIDERS: RAPIDS: COMPUTE: False diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index bbbc9ceb..fac0301d 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -1090,10 +1090,29 @@ properties: FITBIT_STEPS_INTRADAY: type: object - required: [CONTAINER, PROVIDERS] + required: [CONTAINER, EXCLUDE_SLEEP, PROVIDERS] properties: CONTAINER: type: string + EXCLUDE_SLEEP: + required: [TIME_BASED, FITBIT_BASED] + properties: + TIME_BASED: + required: [EXCLUDE, START_TIME, END_TIME] + properties: + EXCLUDE: + type: boolean + START_TIME: + type: string + pattern: "^([0-1][0-9]|2[0-3]):[0-5][0-9]$" + END_TIME: + type: string + pattern: "^([0-1][0-9]|2[0-3]):[0-5][0-9]$" + FITBIT_BASED: + required: [EXCLUDE] + properties: + EXCLUDE: + type: boolean PROVIDERS: type: ["null", object] properties: