diff --git a/config.yaml b/config.yaml index d8f8c764..be93cd01 100644 --- a/config.yaml +++ b/config.yaml @@ -321,19 +321,19 @@ FITBIT_DATA_STREAMS: # AVAILABLE: fitbitjson_mysql: DATABASE_GROUP: MY_GROUP - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00). fitbitparsed_mysql: DATABASE_GROUP: MY_GROUP - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. - + SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00). + fitbitjson_csv: FOLDER: data/external/fitbit_csv - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00). fitbitparsed_csv: FOLDER: data/external/fitbit_csv - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp. + SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00). # Sensors ------ @@ -386,7 +386,7 @@ FITBIT_SLEEP_SUMMARY: PROVIDERS: RAPIDS: COMPUTE: False - FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] + FEATURES: ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] SLEEP_TYPES: ["main", "nap", "all"] SRC_SCRIPT: src/features/fitbit_sleep_summary/rapids/main.py @@ -397,31 +397,30 @@ FITBIT_SLEEP_INTRADAY: RAPIDS: COMPUTE: False FEATURES: - LEVELS_AND_TYPES_COMBINING_ALL: True LEVELS_AND_TYPES: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration] RATIOS_TYPE: [count, duration] RATIOS_SCOPE: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES] ROUTINE: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap] SLEEP_LEVELS: + INCLUDE_ALL_GROUPS: True CLASSIC: [awake, restless, asleep] STAGES: [wake, deep, light, rem] UNIFIED: [awake, asleep] - SLEEP_TYPES: [main, nap] - INCLUDE_SLEEP_LATER_THAN: 0 # a number ranged from 0 (midnight) to 1439 (23:59) - REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT" + SLEEP_TYPES: [main, nap, all] + LAST_NIGHT_END: 0 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight + ROUTINE_REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT" SRC_SCRIPT: src/features/fitbit_sleep_intraday/rapids/main.py PRICE: COMPUTE: False - FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain] + FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, stdstarttimeofepisodemain, stdendtimeofepisodemain, stdmidpointofepisodemain, socialjetlag, rmssdmeanstarttimeofepisodemain, rmssdmeanendtimeofepisodemain, rmssdmeanmidpointofepisodemain, rmssdmedianstarttimeofepisodemain, rmssdmedianendtimeofepisodemain, rmssdmedianmidpointofepisodemain] SLEEP_LEVELS: + INCLUDE_ALL_GROUPS: True CLASSIC: [awake, restless, asleep] STAGES: [wake, deep, light, rem] UNIFIED: [awake, asleep] DAY_TYPES: [WEEKEND, WEEK, ALL] - GROUP_EPISODES_WITHIN: # by default: today's 6pm to tomorrow's noon - START_TIME: 1080 # number of minutes after the midnight (18:00) 18*60 - LENGTH: 1080 # in minutes (18 hours) 18*60 + LAST_NIGHT_END: 660 # number of minutes after the midnight (11:00) 11*60 SRC_SCRIPT: src/features/fitbit_sleep_intraday/price/main.py # See https://www.rapids.science/latest/features/fitbit-steps-summary/ diff --git a/docs/change-log.md b/docs/change-log.md index 91c42676..c452e600 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -1,5 +1,9 @@ # Change Log +## v1.2.0 +- Sleep summary and intraday features are more consistent. +- Add wake and bedtime features for sleep summary data. +- Fix bugs with sleep PRICE features. ## v1.1.1 - Fix length of periodic segments on days with DLS - Fix crash when scraping data for an app that does not exist diff --git a/docs/features/fitbit-sleep-intraday.md b/docs/features/fitbit-sleep-intraday.md index 4ff44789..a0320a1d 100644 --- a/docs/features/fitbit-sleep-intraday.md +++ b/docs/features/fitbit-sleep-intraday.md @@ -8,6 +8,10 @@ Sensor parameters description for `[FITBIT_SLEEP_INTRADAY]`: ## RAPIDS provider +!!! hint "Understanding RAPIDS features" + [This diagram](../../img/sleep_intraday_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the RAPIDS provider. + + !!! info "Available time segments" - Available for all time segments @@ -29,23 +33,23 @@ Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS]`: |----------------|----------------------------------------------------------------------------------------------------------------------------------- |`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `RAPIDS` provider| |`[FEATURES]` | Features to be computed from sleep intraday data, see table below | -|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). -|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: `main`, `nap`. -|`[INCLUDE_SLEEP_LATER_THAN]`| All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day. -|`[REFERENCE_TIME]`| The reference point from which the `[ROUTINE]` features are to be computed. Chosen from `MIDNIGHT` and `START_OF_THE_SEGMENT`, default is `MIDNIGHT`. If you have multiple time segments per day it might be more informative to set this flag to `START_OF_THE_SEGMENT`. +|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS groups them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). In this section, there is a boolean flag named `INCLUDE_ALL_GROUPS` that if set to TRUE, computes LEVELS_AND_TYPES features grouping all levels together in a single `all` category. +|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. There are three sleep types: `main`, `nap`, and `all`. The `all` type means both main sleep and naps are considered. +|`[LAST_NIGHT_END]`| All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It ranges from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is applied every day. +|`[ROUTINE_REFERENCE_TIME]`| The reference point from which the `[ROUTINE]` features are computed, it can be `MIDNIGHT` or `START_OF_THE_SEGMENT`, default is `MIDNIGHT`. If you have multiple time segments per day it might be more informative to set this flag to `START_OF_THE_SEGMENT`. Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES]`: |Feature                                           |Units |Description | |------------------------------- |-------------- |-------------------------------------------------------------| -|countepisode`[LEVEL][TYPE]` |episodes |Number of `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]`and `[TYPE]` can also be `all` when ``LEVELS_AND_TYPES_COMBINING_ALL`` is True, which ignores the levels and groups by sleep types. -|sumduration`[LEVEL][TYPE]` |minutes |Total duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. -|maxduration`[LEVEL][TYPE]` |minutes | Longest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. -|minduration`[LEVEL][TYPE]` |minutes | Shortest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. -|avgduration`[LEVEL][TYPE]` |minutes | Average duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. -|medianduration`[LEVEL][TYPE]` |minutes | Median duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. -|stdduration`[LEVEL][TYPE]` |minutes | Standard deviation duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types. +|countepisode`[LEVEL][TYPE]` |episodes |Number of `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|sumduration`[LEVEL][TYPE]` |minutes |Total duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|maxduration`[LEVEL][TYPE]` |minutes | Longest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|minduration`[LEVEL][TYPE]` |minutes | Shortest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|avgduration`[LEVEL][TYPE]` |minutes | Average duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|medianduration`[LEVEL][TYPE]` |minutes | Median duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. +|stdduration`[LEVEL][TYPE]` |minutes | Standard deviation duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types. Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[ACROSS_LEVELS]`: @@ -84,22 +88,29 @@ Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE]`: |Feature |Units |Description | |--------------------------------- |-------------- |-------------------------------------------------------------| -|starttimefirstmainsleep |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first main sleep episode after `INCLUDE_EPISODES_LATER_THAN`. -|endtimelastmainsleep |minutes |End time (in minutes since `REFERENCE_TIME`) of the last main sleep episode after `INCLUDE_EPISODES_LATER_THAN`. -|starttimefirstnap |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first nap episode after `INCLUDE_EPISODES_LATER_THAN`. -|endtimelastnap |minutes |End time (in minutes since `REFERENCE_TIME`) of the last nap episode after `INCLUDE_EPISODES_LATER_THAN`. +|starttimefirstmainsleep |minutes |Start time (in minutes since `ROUTINE_REFERENCE_TIME`) of the first main sleep episode after `INCLUDE_EPISODES_LATER_THAN`. +|endtimelastmainsleep |minutes |End time (in minutes since `ROUTINE_REFERENCE_TIME`) of the last main sleep episode after `INCLUDE_EPISODES_LATER_THAN`. +|starttimefirstnap |minutes |Start time (in minutes since `ROUTINE_REFERENCE_TIME`) of the first nap episode after `INCLUDE_EPISODES_LATER_THAN`. +|endtimelastnap |minutes |End time (in minutes since `ROUTINE_REFERENCE_TIME`) of the last nap episode after `INCLUDE_EPISODES_LATER_THAN`. !!! note "Assumptions/Observations" - 1. Deleting values from `[SLEEP_LEVELS]` or `[SLEEP_TYPES]` will only change the features you receive from `[LEVELS_AND_TYPES]`. For example if `STAGES` only contains `[rem, light]` you will not receive `countepisode[wake|deep][TYPE]` or sum, max, min, avg, median, or std `duration`. These values will not influence `RATIOS` or `ROUTINE` features. - 2. Any `[LEVEL]` grouping is done within the elements of each class `CLASSIC`, `STAGES`, and `UNIFIED`. That is, we never combine `CLASSIC` or `STAGES` types to compute features when `LEVELS_AND_TYPES_COMBINING_ALL` is True or when computing `RATIOS`. - + 1. [This diagram](../../img/sleep_intraday_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the RAPIDS provider. + 1. Features listed in `[LEVELS_AND_TYPES]` are computed for any levels and types listed in `[SLEEP_LEVELS]` or `[SLEEP_TYPES]`. For example if `STAGES` only contains `[rem, light]` you will not get `countepisode[wake|deep][TYPE]` or sum, max, min, avg, median, or std `duration`. Levels or types in these lists do not influence `RATIOS` or `ROUTINE` features. + 2. Any `[LEVEL]` grouping is done within the elements of each class `CLASSIC`, `STAGES`, and `UNIFIED`. That is, we never combine `CLASSIC` or `STAGES` types to compute features. + 3. The categories for `all` levels (when `INCLUDE_ALL_GROUPS` is `True`) and `all` `SLEEP_TYPES` are not considered for `RATIOS` features as they are always 1. + 3. These features can be computed in time segments of any length, but only the 1-minute sleep chunks within each segment instance will be used. + 4. Within any time segment instance, any chunks with a local time before `LAST_NIGHT_END` will be discarded. The default `LNE` is 00:00 so no chunks are ignored. + 5. `ROUTINE_REFERENCE_TIME` influences all the `[ROUTINE]` features. If `MIDNIGHT`, the reference for these times is 00:00, if `START_OF_THE_SEGMENT`, the reference time is the start of each segment instance. ## PRICE provider +!!! hint "Understanding PRICE features" + [This diagram](../../img/sleep_intraday_price.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the PRICE provider. + !!! info "Available time segments" - Available for any time segments larger or equal to one day @@ -120,94 +131,40 @@ Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE]`: |----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `PRICE` provider | |`[FEATURES]` | Features to be computed from sleep intraday data, see table below -|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). +|`[SLEEP_LEVELS]` | Fitbit’s sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS groups them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). In this section, there is a boolean flag named `INCLUDE_ALL_GROUPS` that if set to TRUE, computes avgdurationallmain`[DAY_TYPE]` features grouping all levels together in a single `all` category. |`[DAY_TYPE]` | The features of this provider can be computed using daily averages/standard deviations that were extracted on `WEEKEND` days only, `WEEK` days only, or `ALL` days| -|`[GROUP_EPISODES_WITHIN]` | This parameter contains 2 values: `[START_TIME]` and `[LENGTH]`. Only `main` sleep episodes that intersect or contain the period between [`START_TIME`, `START_TIME` + `LENGTH`] are taken into account to compute the features described below. Both `[START_TIME]` and `[LENGTH]` are in minutes. `[START_TIME]` is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. `[LENGTH]` is a number smaller than 1440 (24 hours). | +|`[LAST_NIGHT_END]` | Only `main` sleep episodes that start within the `LNE-LNE` interval [`LAST_NIGHT_END`, `LAST_NIGHT_END` + 23H 59M 59S] are taken into account to compute the features described below. `[LAST_NIGHT_END]` is a number ranging from 0 (midnight) to 1439 (23:59). | Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE]`: |Feature                                                             |Units |Description | |------------------------------------- |----------------- |-------------------------------------------------------------| -|avgduration`[LEVEL]`main`[DAY_TYPE]` |minutes | Average duration of daily `LEVEL` sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|avgratioduration`[LEVEL]`withinmain`[DAY_TYPE]` |- | Average ratio between daily `LEVEL` time and in-bed time inferred from `main` sleep episodes. `LEVEL` is one of `SLEEP_LEVELS` (e.g. awake-classic or rem-stages). In-bed time is the total duration of all `main` sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|avgstarttimeofepisodemain`[DAY_TYPE]` |minutes | Average start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|avgendtimeofepisodemain`[DAY_TYPE]` |minutes | Average end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|avgmidpointofepisodemain`[DAY_TYPE]` |minutes | Average mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|stdstarttimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|stdendtimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|stdmidpointofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag. -|socialjetlag |minutes | Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays. -|meanssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive start times. -|meanssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive end times. -|meanssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive mid times. -|medianssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive start times. -|medianssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive end times. -|medianssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive mid times. +|avgduration`[LEVEL]`main`[DAY_TYPE]` |minutes | Average duration of daily sleep chunks of a `LEVEL`. Use the `DAY_TYPE` flag to include daily durations from weekend days only, weekdays, or both. Use `[LEVEL]` to group all levels in a single `all` category. +|avgratioduration`[LEVEL]`withinmain`[DAY_TYPE]` |- | Average of the daily ratio between the duration of sleep chunks of a `LEVEL` and total duration of all `main` sleep episodes in a day. When `INCLUDE_ALL_GROUPS` is `True` the `all` `LEVEL` is ignored since this feature is always 1. Use the `DAY_TYPE` flag to include start times from weekend days only, weekdays, or both. +|avgstarttimeofepisodemain`[DAY_TYPE]` |minutes | Average of all start times of the first `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include start times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|avgendtimeofepisodemain`[DAY_TYPE]` |minutes | Average of all end times of the last `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|avgmidpointofepisodemain`[DAY_TYPE]` |minutes | Average of all the differences between `avgendtime...` and `avgstarttime..` in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|stdstarttimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all start times of the first `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include start times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|stdendtimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all end times of the last `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|stdmidpointofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all the differences between `avgendtime...` and `avgstarttime..` in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both. +|socialjetlag |minutes | Difference in minutes between the avgmidpointofepisodemain of weekends and weekdays that belong to each time segment instance. If your time segment does not contain at least one week day and one weekend day this feature will be NA. +|rmssdmeanstarttimeofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `starttimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `starttimeofepisodemain` (bedtime) changed from night to night. +|rmssdmeanendtimeofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `endtimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `endtimeofepisodemain` (wake time) changed from night to night. +|rmssdmeanmidpointofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `midpointofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `midpointofepisodemain` (mid time between bedtime and wake time) changed from night to night. +|rmssdmedianstarttimeofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `starttimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `starttimeofepisodemain` (bedtime) changed from night to night. +|rmssdmedianendtimeofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `endtimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `endtimeofepisodemain` (wake time) changed from night to night. +|rmssdmedianmidpointofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `midpointofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `midpointofepisodemain` (average mid time between bedtime and wake time) changed from night to night. !!! note "Assumptions/Observations" - 1. These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average). + 1. [This diagram](../../img/sleep_intraday_price.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the PRICE provider. + 1. We recommend you use periodic segments that start in the morning so RAPIDS can chunk and group sleep episodes overnight. Shifted segments (as any other segments) are labelled based on their start and end date times. + 5. `avgstarttime...` and `avgendtime...` are roughly equivalent to an average bed and awake time only if you are using shifted segments. + 1. The features of this provider are only available on time segments that are longer than 24 hours because they are based on descriptive statistics computed across daily values. 2. Even though Fitbit provides 2 types of sleep episodes (`main` and `nap`), only `main` sleep episodes are considered. - 3. How do we assign sleep episodes to specific dates? - - `START_TIME` and `LENGTH` control the dates that sleep episodes belong to. For a pair of `[START_TIME]` and `[LENGTH]`, sleep episodes (blue boxes) can only be placed at the following places: - -
- -
Relationship between sleep episodes and the given times`([START_TIME], [LENGTH])`
-
- - - If the end time of a sleep episode is before `[START_TIME]`, it will belong to the day before its start date (e.g. sleep episode #1). - - - if (1) the start time or the end time of a sleep episode are between (overlap) `[START_TIME]` and `[START_TIME] + [LENGTH]` or (2) the start time is before `[START_TIME]` and the end time is after `[START_TIME] + [LENGTH]`, it will belong to its start date (e.g. sleep episode #2, #3, #4, #5). - - - If the start time of a sleep episode is after `START_TIME] + [LENGTH]`, it will belong to the day after its start date (e.g. sleep episode #6). - - Only `main` sleep episodes that intersect or contain the period between `[START_TIME]` and `[START_TIME] + [LENGTH]` will be included in the feature computation. If we process the following `main` sleep episodes: - - | episode |start|end| - |-|-|-| - |1|2021-02-01 12:00|2021-02-01 15:00| - |2|2021-02-01 21:00|2021-02-02 03:00|02-01 - |3|2021-02-02 05:00|2021-02-02 08:00|02-01 - |4|2021-02-02 11:00|2021-02-02 14:00| - |5|2021-02-02 19:00|2021-02-03 06:00|02-02 - - And our parameters: - - - `[INCLUDE_EPISODES_INTERSECTING][START_TIME]` = 1320 (today's 22:00) - - - `[INCLUDE_EPISODES_INTERSECTING][LENGTH]` = 720 (tomorrow's 10:00, or 22:00 + 12 hours) - - Only sleep episodes 2, 3,and 5 would be considered. - - 4. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day's midnight. - - 5. All `main` sleep episodes are chunked within the requested [time segments](../../setup/configuration/#time-segments) which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example: - - The daily features extracted on 2021-02-01 will be: - - - starttimeofepisodemain (bedtime) is `21 * 60` (episode 2 start time 2021-02-01 21:00) - - - endtimeofepisodemain (wake time) is `32 * 60 `(episode 3 end time 2021-02-02 08:00 + 24) - - - midpointofepisodemain (midpoint sleep) is `[(21 * 60) + (32 * 60)] / 2` - - - The daily features extracted on 2021-02-02 will be: - - - starttimeofepisodemain (bedtime) is `19 * 60` (episode 5 start time 2021-02-01 19:00) - - - endtimeofepisodemain (wake time) is `30 * 60 `(episode 5 end time 2021-02-03 06:00 + 24) - - - midpointofepisodemain (midpoint sleep) is `[(19 * 60) + (30 * 60)] / 2` - - And `avgstarttimeofepisodemain[DAY_TYPE]` will be `([21 * 60] + [19 * 60]) / 2` - - - - - - + 4. The reference point for all times is 00:00 of the first day in the LNE-LNE interval. + 5. Sleep episodes are formed by 1-minute chunks that we group overnight starting from today’s LNE and ending on tomorrow’s LNE or the end of that segment (whatever is first). + 5. The features `avgstarttime...` and `avgendtime...` are the average of the first and last sleep episode across every LNE-LNE interval within a segment (`avgmidtime...` is the mid point between start and end). Therefore, only segments longer than 24hrs will be averaged across more than one LNE-LNE interval. + 5. `socialjetlag` is only available on segment instances equal or longer than 48hrs that contain at least one weekday day and one weekend day, for example seven-day (weekly) segments. diff --git a/docs/features/fitbit-sleep-summary.md b/docs/features/fitbit-sleep-summary.md index 515cb6d8..060e61e7 100644 --- a/docs/features/fitbit-sleep-summary.md +++ b/docs/features/fitbit-sleep-summary.md @@ -9,6 +9,9 @@ Sensor parameters description for `[FITBIT_SLEEP_SUMMARY]`: ## RAPIDS provider +!!! hint "Understanding RAPIDS features" + [This diagram](../../img/sleep_summary_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments using `SLEEP_SUMMARY_LAST_NIGHT_END` for the RAPIDS provider. + !!! info "Available time segments" - Only available for segments that span 1 or more complete days (e.g. Jan 1st 00:00 to Jan 3rd 23:59) @@ -26,14 +29,19 @@ Parameters description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`: |Key                              | Description | |----------------|----------------------------------------------------------------------------------------------------------------------------------- |`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_SUMMARY` features from the `RAPIDS` provider | -|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: `main`, `nap`, `all`. | +|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. There are three sleep types: `main`, `nap`, and `all`. The `all` type means both main sleep and naps are considered. | |`[FEATURES]` | Features to be computed from sleep summary data, see table below | +|`[FITBIT_DATA_STREAMS][data stream][SLEEP_SUMMARY_LAST_NIGHT_END]` | As an exception, the `LAST_NIGHT_END` parameter for this provider is in the data stream configuration section. This parameter controls how sleep episodes are assigned to different days and affects wake and bedtimes.| Features description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`: |Feature |Units |Description | |------------------------------ |---------- |-------------------------------------------- | +|firstwaketimeTYPE |minutes |First wake time for a certain sleep type during a time segment. Wake time is number of minutes after midnight of a sleep episode's end time. +|lastwaketimeTYPE |minutes |Last wake time for a certain sleep type during a time segment. Wake time is number of minutes after midnight of a sleep episode's end time. +|firstbedtimeTYPE |minutes |First bedtime for a certain sleep type during a time segment. Bedtime is number of minutes after midnight of a sleep episode's start time. +|lastbedtimeTYPE |minutes |Last bedtime for a certain sleep type during a time segment. Bedtime is number of minutes after midnight of a sleep episode's start time. |countepisodeTYPE |episodes |Number of sleep episodes for a certain sleep type during a time segment. |avgefficiencyTYPE |scores |Average sleep efficiency for a certain sleep type during a time segment. |sumdurationafterwakeupTYPE |minutes |Total duration the user stayed in bed after waking up for a certain sleep type during a time segment. @@ -50,10 +58,13 @@ Features description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`: !!! note "Assumptions/Observations" - - 1. There are three sleep types (TYPE): `main`, `nap`, `all`. The `all` type contains both main sleep and naps. - + 1. [This diagram](../../img/sleep_summary_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments using `LNE` for the RAPIDS provider. + 1. There are three sleep types (TYPE): `main`, `nap`, `all`. The `all` type groups both `main` sleep and `naps`. All types are based on Fitbit's labels. 2. There are two versions of Fitbit’s sleep API ([version 1](https://dev.fitbit.com/build/reference/web-api/sleep-v1/) and [version 1.2](https://dev.fitbit.com/build/reference/web-api/sleep/)), and each provides raw sleep data in a different format: - _Count & duration summaries_. `v1` contains `count_awake`, `duration_awake`, `count_awakenings`, `count_restless`, and `duration_restless` fields for every sleep record but `v1.2` does not. - - 3. _API columns_. Features are computed based on the values provided by Fitbit’s API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`. \ No newline at end of file + 3. _API columns_. Most features are computed based on the values provided by Fitbit’s API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`. + 4. Bed time and sleep duration are based on episodes that started between today’s LNE and tomorrow’s LNE while awake time is based on the episodes that started between yesterday’s LNE and today’s LNE + 5. The reference point for bed/awake times is today’s 00:00. You can have bedtimes larger than 24 and awake times smaller than 0 + 6. These features are only available for time segments that span midnight to midnight of the same or different day. + 7. We include first and last wake and bedtimes because, when `LAST_NIGHT_END` is 10 am, the first bedtime could match a nap at 2 pm, and the last bedtime could match a main overnight sleep episode that starts at 10pm. + 5. Set the value for `SLEEP_SUMMARY_LAST_NIGHT_END` int the config parameter [FITBIT_DATA_STREAMS][data stream][SLEEP_SUMMARY_LAST_NIGHT_END]. \ No newline at end of file diff --git a/docs/img/sleep_intraday_price.png b/docs/img/sleep_intraday_price.png new file mode 100644 index 00000000..5b715e41 Binary files /dev/null and b/docs/img/sleep_intraday_price.png differ diff --git a/docs/img/sleep_intraday_rapids.png b/docs/img/sleep_intraday_rapids.png new file mode 100644 index 00000000..170d6e68 Binary files /dev/null and b/docs/img/sleep_intraday_rapids.png differ diff --git a/docs/img/sleep_summary_rapids.png b/docs/img/sleep_summary_rapids.png new file mode 100644 index 00000000..ab0f4a92 Binary files /dev/null and b/docs/img/sleep_summary_rapids.png differ diff --git a/docs/setup/configuration.md b/docs/setup/configuration.md index 6aabd88d..fefa9708 100644 --- a/docs/setup/configuration.md +++ b/docs/setup/configuration.md @@ -500,19 +500,19 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. # AVAILABLE: fitbitjson_mysql: DATABASE_GROUP: MY_GROUP - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False + SLEEP_SUMMARY_LAST_NIGHT_END: 660 fitbitjson_csv: FOLDER: data/external/fitbit_csv - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False + SLEEP_SUMMARY_LAST_NIGHT_END: 660 fitbitparsed_mysql: DATABASE_GROUP: MY_GROUP - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False + SLEEP_SUMMARY_LAST_NIGHT_END: 660 fitbitparsed_csv: FOLDER: data/external/fitbit_csv - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False + SLEEP_SUMMARY_LAST_NIGHT_END: 660 ``` @@ -524,7 +524,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. | Key | Description | |---------------------|----------------------------------------------------------------------------------------------------------------------------| | `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up | - | `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). | + | `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. | --8<---- "docs/snippets/database.md" @@ -535,7 +535,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. | Key | Description | |---------------------|----------------------------------------------------------------------------------------------------------------------------| | `[FOLDER]` | Folder where you have to place a CSV file **per** Fitbit sensor. Each file has to contain all the data from every participant you want to process. | - | `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). | + | `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. | === "fitbitparsed_mysql" @@ -546,7 +546,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. | Key | Description | |---------------------|----------------------------------------------------------------------------------------------------------------------------| | `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up | - | `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). | + | `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. | --8<---- "docs/snippets/database.md" @@ -557,7 +557,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](. | Key | Description | |---------------------|----------------------------------------------------------------------------------------------------------------------------| | `[FOLDER]` | Folder where you have to place a CSV file **per** Fitbit sensor. Each file has to contain all the data from every participant you want to process. | - | `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). | + | `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. | === "Empatica" diff --git a/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py b/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py index 33d7ef50..34a0c4b8 100644 --- a/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py +++ b/src/data/streams/mutations/fitbit/parse_sleep_summary_json.py @@ -58,15 +58,18 @@ def parseSleepData(sleep_data): def main(json_raw, stream_parameters): parsed_data = parseSleepData(json_raw) + parsed_data["local_date_time"] = (parsed_data["local_start_date_time"] - pd.Timedelta(minutes=stream_parameters["SLEEP_SUMMARY_LAST_NIGHT_END"])).dt.strftime('%Y-%m-%d 00:00:00') + + # complete missing dates + missed_dates = list(set([x.strftime('%Y-%m-%d 00:00:00') for x in pd.date_range(parsed_data["local_date_time"].min(), parsed_data["local_date_time"].max()).to_pydatetime()]) - set(parsed_data["local_date_time"])) + parsed_data = pd.concat([parsed_data, pd.DataFrame({"local_date_time": missed_dates})], axis=0) + parsed_data.sort_values(by=["local_date_time", "local_start_date_time"], inplace=True) + parsed_data["device_id"] = parsed_data["device_id"].interpolate(method="pad") + parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones if pd.api.types.is_datetime64_any_dtype( parsed_data['local_start_date_time']): parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') if pd.api.types.is_datetime64_any_dtype( parsed_data['local_end_date_time']): parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') - if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start": - parsed_data["local_date_time"] = parsed_data['local_start_date_time'] - else: - parsed_data["local_date_time"] = parsed_data['local_end_date_time'] - return(parsed_data) diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index dcc43c8f..9cf8e6cf 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -3,6 +3,15 @@ source("renv/activate.R") library(yaml) library(dplyr) library(readr) + +fix_pandas_nan_in_string_columns <- function(column){ + return(vapply(column, function(value) { + if(!is.character(value) && !is.nan(value)) + stop("The reticulate conversion from the python mutation script to r failed. One or more returned columns are a list with unsupported mixed types. We only handle string columns with np.nan values. Open a GitHub issue or fix the mutation script") + return(ifelse(is.nan(value), NA_character_, value)) + }, FUN.VALUE = character(1))) +} + # we use reticulate but only load it if we are going to use it to minimize the case when old RAPIDS deployments need to update ther renv mutate_data <- function(scripts, data, data_configuration){ for(script in scripts){ @@ -25,6 +34,7 @@ mutate_data <- function(scripts, data, data_configuration){ if(py_has_attr(script_functions, "main")){ message(paste("Applying mutation script", script)) data <- script_functions$main(data, data_configuration) + data <- data %>% mutate(across(where(is.list), fix_pandas_nan_in_string_columns)) } else{ stop(paste0("The following mutation script does not have a main function: ", script)) } diff --git a/src/features/fitbit_sleep_intraday/price/main.py b/src/features/fitbit_sleep_intraday/price/main.py index 2d602305..05dc52e1 100644 --- a/src/features/fitbit_sleep_intraday/price/main.py +++ b/src/features/fitbit_sleep_intraday/price/main.py @@ -3,7 +3,7 @@ import itertools -def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute): +def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute, levels_include_all_groups): features_fullnames = ["local_segment"] @@ -14,7 +14,7 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day for feature in intraday_features_to_compute: if feature == "avgduration": - features_fullnames.extend(["avgduration" + x[0] + "main" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)]) + features_fullnames.extend(["avgduration" + x[0] + "main" + x[1].lower() for x in itertools.product(sleep_level_with_group + (["all"] if levels_include_all_groups else []), day_types_to_compute)]) elif feature == "avgratioduration": features_fullnames.extend(["avgratioduration" + x[0] + "withinmain" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)]) elif feature in ["avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain"]: @@ -69,7 +69,7 @@ def extractDailyFeatures(sleep_data): return daily_features -def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_features_to_compute, sleep_intraday_features): +def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_features_to_compute, sleep_intraday_features, levels_include_all_groups): if day_type == "WEEKEND": daily_features = daily_features[daily_features["is_weekend"] == 0] elif day_type == "WEEK": @@ -110,6 +110,8 @@ def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_featur if "avgratioduration" in intraday_features_to_compute: col = "ratioduration" + sleep_level + sleep_level_group.lower() + "withinmain" sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", col]].groupby("local_segment")[col].mean().to_frame().rename(columns={col: "avg" + col + day_type.lower()})], axis=1) + if levels_include_all_groups and ("avgduration" in intraday_features_to_compute): + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", "durationinbedmain"]].groupby("local_segment")["durationinbedmain"].mean().to_frame().rename(columns={"durationinbedmain": "avgdurationallmain" + day_type.lower()})], axis=1) return sleep_intraday_features @@ -127,28 +129,28 @@ def socialJetLagFeature(daily_features, sleep_intraday_features): return sleep_intraday_features -def MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features): +def RMSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features): date_idx = pd.DataFrame(pd.date_range(start=daily_features["fake_date"].min(), end=daily_features["fake_date"].max(), freq="D"), columns=["fake_date"]) date_idx["fake_date"] = date_idx["fake_date"].dt.date daily_features = daily_features.merge(date_idx, on="fake_date", how="right") for col in ["starttimeofepisodemain", "endtimeofepisodemain", "midpointofepisodemain"]: - daily_features[col + "_diff"] = daily_features[col].diff() + daily_features[col + "_diff"] = daily_features[col].diff().pow(2) - if "meanssdstarttimeofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].mean().to_frame().rename(columns={"starttimeofepisodemain_diff": "meanssdstarttimeofepisodemain"})], axis=1) - if "meanssdendtimeofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].mean().to_frame().rename(columns={"endtimeofepisodemain_diff": "meanssdendtimeofepisodemain"})], axis=1) - if "meanssdmidpointofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].mean().to_frame().rename(columns={"midpointofepisodemain_diff": "meanssdmidpointofepisodemain"})], axis=1) + if "rmssdmeanstarttimeofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"starttimeofepisodemain_diff": "rmssdmeanstarttimeofepisodemain"})], axis=1) + if "rmssdmeanendtimeofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"endtimeofepisodemain_diff": "rmssdmeanendtimeofepisodemain"})], axis=1) + if "rmssdmeanmidpointofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"midpointofepisodemain_diff": "rmssdmeanmidpointofepisodemain"})], axis=1) - if "medianssdstarttimeofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].median().to_frame().rename(columns={"starttimeofepisodemain_diff": "medianssdstarttimeofepisodemain"})], axis=1) - if "medianssdendtimeofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].median().to_frame().rename(columns={"endtimeofepisodemain_diff": "medianssdendtimeofepisodemain"})], axis=1) - if "medianssdmidpointofepisodemain" in intraday_features_to_compute: - sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].median().to_frame().rename(columns={"midpointofepisodemain_diff": "medianssdmidpointofepisodemain"})], axis=1) + if "rmssdmedianstarttimeofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"starttimeofepisodemain_diff": "rmssdmedianstarttimeofepisodemain"})], axis=1) + if "rmssdmedianendtimeofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"endtimeofepisodemain_diff": "rmssdmedianendtimeofepisodemain"})], axis=1) + if "rmssdmedianmidpointofepisodemain" in intraday_features_to_compute: + sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"midpointofepisodemain_diff": "rmssdmedianmidpointofepisodemain"})], axis=1) return sleep_intraday_features @@ -157,16 +159,16 @@ def MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_fe def price_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - daily_start_time = provider["GROUP_EPISODES_WITHIN"]["START_TIME"] - daily_end_time = daily_start_time + provider["GROUP_EPISODES_WITHIN"]["LENGTH"] + last_night_end = provider["LAST_NIGHT_END"] sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_intraday_features = provider["FEATURES"] + levels_include_all_groups = provider["SLEEP_LEVELS"]["INCLUDE_ALL_GROUPS"] requested_sleep_levels = provider["SLEEP_LEVELS"] requested_day_types = provider["DAY_TYPES"] # Name of the features this function can compute - base_intraday_features = ["avgduration", "avgratioduration", "avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", "socialjetlag", "meanssdstarttimeofepisodemain", "meanssdendtimeofepisodemain", "meanssdmidpointofepisodemain", "medianssdstarttimeofepisodemain", "medianssdendtimeofepisodemain", "medianssdmidpointofepisodemain"] + base_intraday_features = ["avgduration", "avgratioduration", "avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", "socialjetlag", "rmssdmeanstarttimeofepisodemain", "rmssdmeanendtimeofepisodemain", "rmssdmeanmidpointofepisodemain", "rmssdmedianstarttimeofepisodemain", "rmssdmedianendtimeofepisodemain", "rmssdmedianmidpointofepisodemain"] base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"], "STAGES": ["wake", "deep", "light", "rem"], "UNIFIED": ["awake", "asleep"]} @@ -178,7 +180,7 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg day_types_to_compute = list(set(requested_day_types) & set(base_day_types)) # Full names - features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute) + features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute, levels_include_all_groups) sleep_intraday_features = pd.DataFrame(columns=features_fullnames) # Filter by segemnts and chunk episodes @@ -206,26 +208,18 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"] # Extract fake date """ The rule used for fake date extraction - set DS = daily_start_time, DE = daily_end_time - set start = start_minutes, end = end_minutes - - if (DS <= start < DE) or (DS < end <= DE) or (start <= DS and end >= DE): + if start_minutes >= last_night_end assign today - elif if end <= DS: + else: assign yesterday - else: (same as start >=DE) - assign tomorrow """ - main_sleep_episodes["fake_date_delta"] = main_sleep_episodes[["start_minutes", "end_minutes"]].apply(lambda row: 0 if ((row["start_minutes"] >= daily_start_time and row["start_minutes"] < daily_end_time) or (row["end_minutes"] > daily_start_time and row["end_minutes"] <= daily_end_time) or (row["start_minutes"] <= daily_start_time and row["end_minutes"] >= daily_end_time)) else -1 if (row["end_minutes"] <= daily_start_time) else 1, axis=1) + main_sleep_episodes["fake_date_delta"] = main_sleep_episodes[["start_minutes"]].apply(lambda row: 0 if row["start_minutes"] >= last_night_end else -1, axis=1) main_sleep_episodes["fake_date"] = (main_sleep_episodes["local_start_date_time"] + pd.to_timedelta(main_sleep_episodes["fake_date_delta"], unit="d")).dt.date # Update "start_minutes" column based on START_TIME main_sleep_episodes["start_minutes"] = main_sleep_episodes[["start_minutes", "fake_date_delta"]].apply(lambda row: row["start_minutes"] - 24 * 60 * row["fake_date_delta"], axis=1) main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"] - # We keep a sleep episode that intersects or contains the period between [START_TIME, START_TIME + LENGTH], aka [daily_start_time, daily_end_time]. - main_sleep_episodes = main_sleep_episodes.query("(start_minutes >= @daily_start_time and start_minutes < @daily_end_time) or (end_minutes > @daily_start_time and end_minutes <= @daily_end_time) or (start_minutes <= @daily_start_time and end_minutes >= @daily_end_time)") - # Sort main sleep episodes based on fake_date and start_minutes main_sleep_episodes = main_sleep_episodes.sort_values(["fake_date", "start_minutes"]) # Extract daily features @@ -233,10 +227,10 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg # Extract features per segment based on daily features for day_type in day_types_to_compute: - sleep_intraday_features = statsOfDailyFeatures(daily_features, day_type, sleep_levels_to_compute, intraday_features_to_compute, sleep_intraday_features) + sleep_intraday_features = statsOfDailyFeatures(daily_features, day_type, sleep_levels_to_compute, intraday_features_to_compute, sleep_intraday_features, levels_include_all_groups) if "socialjetlag" in intraday_features_to_compute: sleep_intraday_features = socialJetLagFeature(daily_features, sleep_intraday_features) - sleep_intraday_features = MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features) + sleep_intraday_features = RMSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features) sleep_intraday_features.index.name = "local_segment" sleep_intraday_features.reset_index(inplace=True) diff --git a/src/features/fitbit_sleep_intraday/rapids/main.py b/src/features/fitbit_sleep_intraday/rapids/main.py index f419cc3d..a7811f8c 100644 --- a/src/features/fitbit_sleep_intraday/rapids/main.py +++ b/src/features/fitbit_sleep_intraday/rapids/main.py @@ -2,7 +2,7 @@ import pandas as pd from datetime import datetime import itertools -def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all): +def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, levels_include_all_groups): features_fullname = ["local_segment"] @@ -11,8 +11,8 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sle for sleep_level in sleep_levels_to_compute[sleep_level_group]: sleep_level_with_group.append(sleep_level + sleep_level_group.lower()) - if consider_all: - features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group + ["all"], sleep_types_to_compute + ["all"])]) + if levels_include_all_groups: + features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group + ["all"], sleep_types_to_compute)]) else: features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group, sleep_types_to_compute)]) if "ACROSS_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]: @@ -20,9 +20,9 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sle if "ACROSS_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"] and "main" in sleep_types_to_compute: features_fullname.extend(["ratio" + x + "main" for x in intraday_features_to_compute["RATIOS_TYPE"]]) if "WITHIN_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]: - features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_types_to_compute, sleep_level_with_group)]) + features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], set(sleep_types_to_compute) & set(["main", "nap"]), sleep_level_with_group)]) if "WITHIN_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"]: - features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group, sleep_types_to_compute)]) + features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group, set(sleep_types_to_compute) & set(["main", "nap"]))]) features_fullname.extend(intraday_features_to_compute["ROUTINE"]) return features_fullname @@ -68,26 +68,28 @@ def statsFeatures(sleep_episodes, features, episode_type): def allStatsFeatures(sleep_data, base_sleep_levels, base_sleep_types, features, sleep_intraday_features): # For CLASSIC - for sleep_level, sleep_type in itertools.product(base_sleep_levels["CLASSIC"] + ["all"], base_sleep_types + ["all"]): - sleep_episodes_classic = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data + for sleep_level, sleep_type in itertools.product(base_sleep_levels["CLASSIC"] + ["all"], base_sleep_types): + sleep_episodes_classic = sleep_data[sleep_data["type"] == "classic"] + sleep_episodes_classic = sleep_episodes_classic[sleep_episodes_classic["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_episodes_classic sleep_episodes_classic = sleep_episodes_classic[sleep_episodes_classic["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_classic sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_classic, features, sleep_level + "classic" + sleep_type)], axis=1) # For STAGES - for sleep_level, sleep_type in itertools.product(base_sleep_levels["STAGES"] + ["all"], base_sleep_types + ["all"]): - sleep_episodes_stages = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data + for sleep_level, sleep_type in itertools.product(base_sleep_levels["STAGES"] + ["all"], base_sleep_types): + sleep_episodes_stages = sleep_data[sleep_data["type"] == "stages"] + sleep_episodes_stages = sleep_episodes_stages[sleep_episodes_stages["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_episodes_stages sleep_episodes_stages = sleep_episodes_stages[sleep_episodes_stages["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_stages sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_stages, features, sleep_level + "stages" + sleep_type)], axis=1) # For UNIFIED - for sleep_level, sleep_type in itertools.product(base_sleep_levels["UNIFIED"] + ["all"], base_sleep_types + ["all"]): + for sleep_level, sleep_type in itertools.product(base_sleep_levels["UNIFIED"] + ["all"], base_sleep_types): sleep_episodes_unified = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data sleep_episodes_unified = sleep_episodes_unified[sleep_episodes_unified["unified_level"] == (0 if sleep_level == "awake" else 1)] if sleep_level != "all" else sleep_episodes_unified sleep_episodes_unified = mergeSleepEpisodes(sleep_episodes_unified, ["local_segment", "unified_level_episode_id"]) sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_unified, features, sleep_level + "unified" + sleep_type)], axis=1) # Ignore the levels (e.g. countepisode[all][main]) - for sleep_type in base_sleep_types + ["all"]: + for sleep_type in base_sleep_types: sleep_episodes_none = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data sleep_episodes_none = mergeSleepEpisodes(sleep_episodes_none, ["local_segment", "type_episode_id"]) sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_none, features, "all" + sleep_type)], axis=1) @@ -151,6 +153,11 @@ def ratiosFeatures(sleep_intraday_features, ratios_types, ratios_scopes, sleep_l # 7) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "main" # 8) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "nap" for ratios_type, sleep_levels_combined, sleep_type in itertools.product(ratios_types, sleep_level_with_group, sleep_types): + + # "all" sleep type will not be cosidered for any ratios features since it will be 1 all the time + if sleep_type == "all": + continue + sleep_level_group, sleep_level = sleep_levels_combined[0], sleep_levels_combined[1] agg_func = "countepisode" if ratios_type == "count" else "sumduration" @@ -167,36 +174,36 @@ def ratiosFeatures(sleep_intraday_features, ratios_types, ratios_scopes, sleep_l return sleep_intraday_features -def singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features): +def singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, sleep_type, sleep_intraday_features): sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["is_main_sleep"] == (1 if sleep_type == "mainsleep" else 0)] if "starttimefirst" + sleep_type in routine: grouped_first = sleep_intraday_data.groupby(["local_segment"]).first() - if reference_time == "MIDNIGHT": + if routine_reference_time == "MIDNIGHT": sleep_intraday_features["starttimefirst" + sleep_type] = grouped_first["local_start_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60) - elif reference_time == "START_OF_THE_SEGMENT": + elif routine_reference_time == "START_OF_THE_SEGMENT": sleep_intraday_features["starttimefirst" + sleep_type] = (grouped_first["start_timestamp"] - grouped_first["segment_start_timestamp"]) / (60 * 1000) else: - raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.") + raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: ROUTINE_REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.") if "endtimelast" + sleep_type in routine: grouped_last = sleep_intraday_data.groupby(["local_segment"]).last() - if reference_time == "MIDNIGHT": + if routine_reference_time == "MIDNIGHT": sleep_intraday_features["endtimelast" + sleep_type] = grouped_last["local_end_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60) - elif reference_time == "START_OF_THE_SEGMENT": + elif routine_reference_time == "START_OF_THE_SEGMENT": sleep_intraday_features["endtimelast" + sleep_type] = (grouped_last["end_timestamp"] - grouped_last["segment_start_timestamp"]) / (60 * 1000) else: - raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.") + raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: ROUTINE_REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.") return sleep_intraday_features -def routineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features): +def routineFeatures(sleep_intraday_data, routine, routine_reference_time, sleep_type, sleep_intraday_features): if "starttimefirstmainsleep" in routine or "endtimelastmainsleep" in routine: - sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "mainsleep", sleep_intraday_features) + sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, "mainsleep", sleep_intraday_features) if "starttimefirstnap" in routine or "endtimelastnap" in routine: - sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "nap", sleep_intraday_features) + sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, "nap", sleep_intraday_features) return sleep_intraday_features @@ -205,11 +212,11 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) - consider_all = provider["FEATURES"]["LEVELS_AND_TYPES_COMBINING_ALL"] - include_sleep_later_than = provider["INCLUDE_SLEEP_LATER_THAN"] - reference_time = provider["REFERENCE_TIME"] + last_night_end = provider["LAST_NIGHT_END"] + routine_reference_time = provider["ROUTINE_REFERENCE_TIME"] requested_intraday_features = provider["FEATURES"] + levels_include_all_groups = provider["SLEEP_LEVELS"]["INCLUDE_ALL_GROUPS"] requested_sleep_levels = provider["SLEEP_LEVELS"] requested_sleep_types = provider["SLEEP_TYPES"] @@ -221,7 +228,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"], "STAGES": ["wake", "deep", "light", "rem"], "UNIFIED": ["awake", "asleep"]} - base_sleep_types = ["main", "nap"] + base_sleep_types = ["main", "nap", "all"] # The subset of requested features this function can compute intraday_features_to_compute = {key: list(set(requested_intraday_features[key]) & set(base_intraday_features[key])) for key in requested_intraday_features if key in base_intraday_features} @@ -229,13 +236,13 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types)) # Full names - features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all) + features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, levels_include_all_groups) sleep_intraday_features = pd.DataFrame(columns=features_fullnames) - # Include sleep later than + # Any 1-minute sleep chuncks with a local time before LAST_NIGHT_END will be discarded. start_minutes = sleep_intraday_data.groupby("start_timestamp").first()["local_time"].apply(lambda x: int(x.split(":")[0]) * 60 + int(x.split(":")[1]) + int(x.split(":")[2]) / 60).to_frame().rename(columns={"local_time": "start_minutes"}).reset_index() sleep_intraday_data = sleep_intraday_data.merge(start_minutes, on="start_timestamp", how="left") - sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["start_minutes"] >= include_sleep_later_than] + sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["start_minutes"] >= last_night_end] del sleep_intraday_data["start_minutes"] sleep_intraday_data = filter_data_by_segment(sleep_intraday_data, time_segment) @@ -254,7 +261,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se sleep_intraday_features = ratiosFeatures(sleep_intraday_features, intraday_features_to_compute["RATIOS_TYPE"], intraday_features_to_compute["RATIOS_SCOPE"], sleep_levels_to_compute, sleep_types_to_compute) # ROUTINE: only compute requested features - sleep_intraday_features = routineFeatures(sleep_intraday_data, intraday_features_to_compute["ROUTINE"], reference_time, sleep_types_to_compute, sleep_intraday_features) + sleep_intraday_features = routineFeatures(sleep_intraday_data, intraday_features_to_compute["ROUTINE"], routine_reference_time, sleep_types_to_compute, sleep_intraday_features) # Reset index and discard features which are not requested by user sleep_intraday_features.index.name = "local_segment" diff --git a/src/features/fitbit_sleep_summary/rapids/main.py b/src/features/fitbit_sleep_summary/rapids/main.py index 048baa20..ed98a497 100644 --- a/src/features/fitbit_sleep_summary/rapids/main.py +++ b/src/features/fitbit_sleep_summary/rapids/main.py @@ -43,6 +43,21 @@ def extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features, sl if "countepisode" in summary_features: sleep_summary_features = sleep_summary_features.join(features_count[["timestamp"]], how="outer").rename(columns={"timestamp": "countepisode" + sleep_type}) + + features_first = sleep_summary_data[["local_segment", "minutes_start_episode", "minutes_end_episode"]].groupby(["local_segment"]).first() + + if "firstwaketime" in summary_features: + sleep_summary_features = sleep_summary_features.join(features_first[["minutes_end_episode"]].shift(), how="outer").rename(columns={"minutes_end_episode": "firstwaketime" + sleep_type}) + if "firstbedtime" in summary_features: + sleep_summary_features = sleep_summary_features.join(features_first[["minutes_start_episode"]], how="outer").rename(columns={"minutes_start_episode": "firstbedtime" + sleep_type}) + + features_last = sleep_summary_data[["local_segment", "minutes_start_episode", "minutes_end_episode"]].groupby(["local_segment"]).last() + + if "lastwaketime" in summary_features: + sleep_summary_features = sleep_summary_features.join(features_last[["minutes_end_episode"]].shift(), how="outer").rename(columns={"minutes_end_episode": "lastwaketime" + sleep_type}) + if "lastbedtime" in summary_features: + sleep_summary_features = sleep_summary_features.join(features_last[["minutes_start_episode"]], how="outer").rename(columns={"minutes_start_episode": "lastbedtime" + sleep_type}) + return sleep_summary_features @@ -55,7 +70,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se requested_sleep_types = provider["SLEEP_TYPES"] # name of the features this function can compute - base_summary_features = ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] + base_summary_features = ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] base_sleep_types = ["main", "nap", "all"] # the subset of requested features this function can compute summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features)) @@ -63,13 +78,15 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se # full names features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_types_to_compute)] - colnames_can_be_zero = [col for col in features_fullnames_to_compute if "avgefficiency" not in col] + colnames_can_be_zero = ["".join(feature) for feature in itertools.product(set(summary_features_to_compute) - set(["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "avgefficiency"]), sleep_types_to_compute)] # extract features from summary data sleep_summary_features = pd.DataFrame(columns=["local_segment"] + features_fullnames_to_compute) if not sleep_summary_data.empty: sleep_summary_data = filter_data_by_segment(sleep_summary_data, time_segment) + notna_segments = sleep_summary_data[sleep_summary_data["type"].notna()]["local_segment"].unique() + if not sleep_summary_data.empty: # only keep the segments start at 00:00:00 and end at 23:59:59 datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00" @@ -78,13 +95,19 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se segment_regex = "{}#{},{}".format(time_segment, datetime_start_regex, datetime_end_regex) sleep_summary_data = sleep_summary_data[sleep_summary_data["local_segment"].str.match(segment_regex)] + # calculate number of minutes after segment's start date time + dt_cols = ["local_start_date_time", "local_end_date_time", "local_date_time"] + sleep_summary_data[dt_cols] = sleep_summary_data[dt_cols].apply(pd.to_datetime) + sleep_summary_data["minutes_start_episode"] = (sleep_summary_data["local_start_date_time"] - sleep_summary_data["local_date_time"]) / pd.Timedelta(minutes=1) + sleep_summary_data["minutes_end_episode"] = (sleep_summary_data["local_end_date_time"] - (sleep_summary_data["local_date_time"] + pd.Timedelta(days=1))) / pd.Timedelta(minutes=1) + if not sleep_summary_data.empty: sleep_summary_features = pd.DataFrame() for sleep_type in sleep_types_to_compute: sleep_summary_features = extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features_to_compute, sleep_type, sleep_summary_features) - sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0) + sleep_summary_features.loc[notna_segments, colnames_can_be_zero] = sleep_summary_features.loc[notna_segments, colnames_can_be_zero].fillna(0) sleep_summary_features = sleep_summary_features.reset_index() diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 317b738d..9e1dabd0 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -827,22 +827,44 @@ properties: type: string fitbitjson_mysql: type: object - required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] + required: [DATABASE_GROUP, SLEEP_SUMMARY_LAST_NIGHT_END] properties: DATABASE_GROUP: type: string - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: - type: string - enum: ["start", "end"] + SLEEP_SUMMARY_LAST_NIGHT_END: + type: number + minimum: 0 + maximum: 1439 fitbitparsed_mysql: type: object - required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] + required: [DATABASE_GROUP, SLEEP_SUMMARY_LAST_NIGHT_END] properties: DATABASE_GROUP: type: string - SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: + SLEEP_SUMMARY_LAST_NIGHT_END: + type: number + minimum: 0 + maximum: 1439 + fitbitjson_csv: + type: object + required: [FOLDER, SLEEP_SUMMARY_LAST_NIGHT_END] + properties: + FOLDER: type: string - enum: ["start", "end"] + SLEEP_SUMMARY_LAST_NIGHT_END: + type: number + minimum: 0 + maximum: 1439 + fitbitparsed_csv: + type: object + required: [FOLDER, SLEEP_SUMMARY_LAST_NIGHT_END] + properties: + FOLDER: + type: string + SLEEP_SUMMARY_LAST_NIGHT_END: + type: number + minimum: 0 + maximum: 1439 FITBIT_DATA_YIELD: type: object @@ -926,7 +948,7 @@ properties: uniqueItems: True items: type: string - enum: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] + enum: ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] SLEEP_TYPES: type: array uniqueItems: True @@ -951,10 +973,8 @@ properties: - properties: FEATURES: type: object - required: [LEVELS_AND_TYPES_COMBINING_ALL, LEVELS_AND_TYPES, RATIOS_TYPE, RATIOS_SCOPE, ROUTINE] + required: [LEVELS_AND_TYPES, RATIOS_TYPE, RATIOS_SCOPE, ROUTINE] properties: - LEVELS_AND_TYPES_COMBINING_ALL: - type: boolean LEVELS_AND_TYPES: type: array uniqueItems: True @@ -981,8 +1001,10 @@ properties: enum: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap] SLEEP_LEVELS: type: object - required: [CLASSIC, STAGES, UNIFIED] + required: [INCLUDE_ALL_GROUPS, CLASSIC, STAGES, UNIFIED] properties: + INCLUDE_ALL_GROUPS: + type: boolean CLASSIC: type: array uniqueItems: True @@ -1006,12 +1028,12 @@ properties: uniqueItems: True items: type: string - enum: [main, nap] - INCLUDE_SLEEP_LATER_THAN: + enum: [main, nap, all] + LAST_NIGHT_END: type: number minimum: 0 maximum: 1439 - REFERENCE_TIME: + ROUTINE_REFERENCE_TIME: type: string enum: [MIDNIGHT, START_OF_THE_SEGMENT] PRICE: @@ -1022,11 +1044,13 @@ properties: uniqueItems: True items: type: string - enum: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain] + enum: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, stdstarttimeofepisodemain, stdendtimeofepisodemain, stdmidpointofepisodemain, socialjetlag, rmssdmeanstarttimeofepisodemain, rmssdmeanendtimeofepisodemain, rmssdmeanmidpointofepisodemain, rmssdmedianstarttimeofepisodemain, rmssdmedianendtimeofepisodemain, rmssdmedianmidpointofepisodemain] SLEEP_LEVELS: type: object - required: [CLASSIC, STAGES, UNIFIED] + required: [INCLUDE_ALL_GROUPS, CLASSIC, STAGES, UNIFIED] properties: + INCLUDE_ALL_GROUPS: + type: boolean CLASSIC: type: array uniqueItems: True @@ -1051,18 +1075,10 @@ properties: items: type: string enum: [WEEKEND, WEEK, ALL] - GROUP_EPISODES_WITHIN: - type: object - required: [START_TIME, LENGTH] - properties: - START_TIME: - type: number - minimum: 0 - maximum: 1439 - LENGTH: - type: number - minimum: 0 - maximum: 1440 + LAST_NIGHT_END: + type: number + minimum: 0 + maximum: 1439 additionalProperties: $ref: "#/definitions/PROVIDER"