Update params & docs of sleep features

pull/134/head
Meng Li 2021-04-09 14:59:53 -04:00
parent 29cc3f00e9
commit 66d9a9d640
14 changed files with 249 additions and 225 deletions

View File

@ -321,19 +321,19 @@ FITBIT_DATA_STREAMS:
# AVAILABLE:
fitbitjson_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00).
fitbitparsed_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00).
fitbitjson_csv:
FOLDER: data/external/fitbit_csv
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00).
fitbitparsed_csv:
FOLDER: data/external/fitbit_csv
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight. By default, 660 (11:00).
# Sensors ------
@ -386,7 +386,7 @@ FITBIT_SLEEP_SUMMARY:
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
FEATURES: ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
SLEEP_TYPES: ["main", "nap", "all"]
SRC_SCRIPT: src/features/fitbit_sleep_summary/rapids/main.py
@ -397,31 +397,30 @@ FITBIT_SLEEP_INTRADAY:
RAPIDS:
COMPUTE: False
FEATURES:
LEVELS_AND_TYPES_COMBINING_ALL: True
LEVELS_AND_TYPES: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration]
RATIOS_TYPE: [count, duration]
RATIOS_SCOPE: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES]
ROUTINE: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
SLEEP_LEVELS:
INCLUDE_ALL_GROUPS: True
CLASSIC: [awake, restless, asleep]
STAGES: [wake, deep, light, rem]
UNIFIED: [awake, asleep]
SLEEP_TYPES: [main, nap]
INCLUDE_SLEEP_LATER_THAN: 0 # a number ranged from 0 (midnight) to 1439 (23:59)
REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT"
SLEEP_TYPES: [main, nap, all]
LAST_NIGHT_END: 0 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after the midnight
ROUTINE_REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT"
SRC_SCRIPT: src/features/fitbit_sleep_intraday/rapids/main.py
PRICE:
COMPUTE: False
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, stdstarttimeofepisodemain, stdendtimeofepisodemain, stdmidpointofepisodemain, socialjetlag, rmssdmeanstarttimeofepisodemain, rmssdmeanendtimeofepisodemain, rmssdmeanmidpointofepisodemain, rmssdmedianstarttimeofepisodemain, rmssdmedianendtimeofepisodemain, rmssdmedianmidpointofepisodemain]
SLEEP_LEVELS:
INCLUDE_ALL_GROUPS: True
CLASSIC: [awake, restless, asleep]
STAGES: [wake, deep, light, rem]
UNIFIED: [awake, asleep]
DAY_TYPES: [WEEKEND, WEEK, ALL]
GROUP_EPISODES_WITHIN: # by default: today's 6pm to tomorrow's noon
START_TIME: 1080 # number of minutes after the midnight (18:00) 18*60
LENGTH: 1080 # in minutes (18 hours) 18*60
LAST_NIGHT_END: 660 # number of minutes after the midnight (11:00) 11*60
SRC_SCRIPT: src/features/fitbit_sleep_intraday/price/main.py
# See https://www.rapids.science/latest/features/fitbit-steps-summary/

View File

@ -1,5 +1,9 @@
# Change Log
## v1.2.0
- Sleep summary and intraday features are more consistent.
- Add wake and bedtime features for sleep summary data.
- Fix bugs with sleep PRICE features.
## v1.1.1
- Fix length of periodic segments on days with DLS
- Fix crash when scraping data for an app that does not exist

View File

@ -8,6 +8,10 @@ Sensor parameters description for `[FITBIT_SLEEP_INTRADAY]`:
## RAPIDS provider
!!! hint "Understanding RAPIDS features"
[This diagram](../../img/sleep_intraday_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the RAPIDS provider.
!!! info "Available time segments"
- Available for all time segments
@ -29,23 +33,23 @@ Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS]`:
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `RAPIDS` provider|
|`[FEATURES]` | Features to be computed from sleep intraday data, see table below |
|`[SLEEP_LEVELS]` | Fitbits sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`).
|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: `main`, `nap`.
|`[INCLUDE_SLEEP_LATER_THAN]`| All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day.
|`[REFERENCE_TIME]`| The reference point from which the `[ROUTINE]` features are to be computed. Chosen from `MIDNIGHT` and `START_OF_THE_SEGMENT`, default is `MIDNIGHT`. If you have multiple time segments per day it might be more informative to set this flag to `START_OF_THE_SEGMENT`.
|`[SLEEP_LEVELS]` | Fitbits sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS groups them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). In this section, there is a boolean flag named `INCLUDE_ALL_GROUPS` that if set to TRUE, computes LEVELS_AND_TYPES features grouping all levels together in a single `all` category.
|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. There are three sleep types: `main`, `nap`, and `all`. The `all` type means both main sleep and naps are considered.
|`[LAST_NIGHT_END]`| All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It ranges from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is applied every day.
|`[ROUTINE_REFERENCE_TIME]`| The reference point from which the `[ROUTINE]` features are computed, it can be `MIDNIGHT` or `START_OF_THE_SEGMENT`, default is `MIDNIGHT`. If you have multiple time segments per day it might be more informative to set this flag to `START_OF_THE_SEGMENT`.
Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES]`:
|Feature                                           |Units |Description |
|------------------------------- |-------------- |-------------------------------------------------------------|
|countepisode`[LEVEL][TYPE]` |episodes |Number of `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]`and `[TYPE]` can also be `all` when ``LEVELS_AND_TYPES_COMBINING_ALL`` is True, which ignores the levels and groups by sleep types.
|sumduration`[LEVEL][TYPE]` |minutes |Total duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|maxduration`[LEVEL][TYPE]` |minutes | Longest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|minduration`[LEVEL][TYPE]` |minutes | Shortest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|avgduration`[LEVEL][TYPE]` |minutes | Average duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|medianduration`[LEVEL][TYPE]` |minutes | Median duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|stdduration`[LEVEL][TYPE]` |minutes | Standard deviation duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). Both `[LEVEL]` and `[TYPE]`can also be `all` when `LEVELS_AND_TYPES_COMBINING_ALL` is True, which ignores the levels and groups by sleep types.
|countepisode`[LEVEL][TYPE]` |episodes |Number of `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|sumduration`[LEVEL][TYPE]` |minutes |Total duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|maxduration`[LEVEL][TYPE]` |minutes | Longest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|minduration`[LEVEL][TYPE]` |minutes | Shortest duration of any `[LEVEL][TYPE]`sleep episode. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|avgduration`[LEVEL][TYPE]` |minutes | Average duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|medianduration`[LEVEL][TYPE]` |minutes | Median duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
|stdduration`[LEVEL][TYPE]` |minutes | Standard deviation duration of all `[LEVEL][TYPE]`sleep episodes. `[LEVEL]`is one of `[SLEEP_LEVELS]` (e.g. awake-classic or rem-stages) and `[TYPE]` is one of `[SLEEP_TYPES]` (e.g. main). `[LEVEL]` can also be `all` when `INCLUDE_ALL_GROUPS` is True, which ignores the levels and groups by sleep types.
Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS]` RATIOS `[ACROSS_LEVELS]`:
@ -84,22 +88,29 @@ Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE]`:
|Feature |Units |Description |
|--------------------------------- |-------------- |-------------------------------------------------------------|
|starttimefirstmainsleep |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|endtimelastmainsleep |minutes |End time (in minutes since `REFERENCE_TIME`) of the last main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|starttimefirstnap |minutes |Start time (in minutes since `REFERENCE_TIME`) of the first nap episode after `INCLUDE_EPISODES_LATER_THAN`.
|endtimelastnap |minutes |End time (in minutes since `REFERENCE_TIME`) of the last nap episode after `INCLUDE_EPISODES_LATER_THAN`.
|starttimefirstmainsleep |minutes |Start time (in minutes since `ROUTINE_REFERENCE_TIME`) of the first main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|endtimelastmainsleep |minutes |End time (in minutes since `ROUTINE_REFERENCE_TIME`) of the last main sleep episode after `INCLUDE_EPISODES_LATER_THAN`.
|starttimefirstnap |minutes |Start time (in minutes since `ROUTINE_REFERENCE_TIME`) of the first nap episode after `INCLUDE_EPISODES_LATER_THAN`.
|endtimelastnap |minutes |End time (in minutes since `ROUTINE_REFERENCE_TIME`) of the last nap episode after `INCLUDE_EPISODES_LATER_THAN`.
!!! note "Assumptions/Observations"
1. Deleting values from `[SLEEP_LEVELS]` or `[SLEEP_TYPES]` will only change the features you receive from `[LEVELS_AND_TYPES]`. For example if `STAGES` only contains `[rem, light]` you will not receive `countepisode[wake|deep][TYPE]` or sum, max, min, avg, median, or std `duration`. These values will not influence `RATIOS` or `ROUTINE` features.
2. Any `[LEVEL]` grouping is done within the elements of each class `CLASSIC`, `STAGES`, and `UNIFIED`. That is, we never combine `CLASSIC` or `STAGES` types to compute features when `LEVELS_AND_TYPES_COMBINING_ALL` is True or when computing `RATIOS`.
1. [This diagram](../../img/sleep_intraday_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the RAPIDS provider.
1. Features listed in `[LEVELS_AND_TYPES]` are computed for any levels and types listed in `[SLEEP_LEVELS]` or `[SLEEP_TYPES]`. For example if `STAGES` only contains `[rem, light]` you will not get `countepisode[wake|deep][TYPE]` or sum, max, min, avg, median, or std `duration`. Levels or types in these lists do not influence `RATIOS` or `ROUTINE` features.
2. Any `[LEVEL]` grouping is done within the elements of each class `CLASSIC`, `STAGES`, and `UNIFIED`. That is, we never combine `CLASSIC` or `STAGES` types to compute features.
3. The categories for `all` levels (when `INCLUDE_ALL_GROUPS` is `True`) and `all` `SLEEP_TYPES` are not considered for `RATIOS` features as they are always 1.
3. These features can be computed in time segments of any length, but only the 1-minute sleep chunks within each segment instance will be used.
4. Within any time segment instance, any chunks with a local time before `LAST_NIGHT_END` will be discarded. The default `LNE` is 00:00 so no chunks are ignored.
5. `ROUTINE_REFERENCE_TIME` influences all the `[ROUTINE]` features. If `MIDNIGHT`, the reference for these times is 00:00, if `START_OF_THE_SEGMENT`, the reference time is the start of each segment instance.
## PRICE provider
!!! hint "Understanding PRICE features"
[This diagram](../../img/sleep_intraday_price.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the PRICE provider.
!!! info "Available time segments"
- Available for any time segments larger or equal to one day
@ -120,94 +131,40 @@ Parameters description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE]`:
|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_INTRADAY` features from the `PRICE` provider |
|`[FEATURES]` | Features to be computed from sleep intraday data, see table below
|`[SLEEP_LEVELS]` | Fitbits sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS grouped them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`).
|`[SLEEP_LEVELS]` | Fitbits sleep API Version 1 only provides `CLASSIC` records. However, Version 1.2 provides 2 types of records: `CLASSIC` and `STAGES`. `STAGES` is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While `CLASSIC` contains 3 sleep levels (`awake`, `restless`, and `asleep`), `STAGES` contains 4 sleep levels (`wake`, `deep`, `light`, `rem`). To make it consistent, RAPIDS groups them into 2 `UNIFIED` sleep levels: `awake` (`CLASSIC`: `awake` and `restless`; `STAGES`: `wake`) and `asleep` (`CLASSIC`: `asleep`; `STAGES`: `deep`, `light`, and `rem`). In this section, there is a boolean flag named `INCLUDE_ALL_GROUPS` that if set to TRUE, computes avgdurationallmain`[DAY_TYPE]` features grouping all levels together in a single `all` category.
|`[DAY_TYPE]` | The features of this provider can be computed using daily averages/standard deviations that were extracted on `WEEKEND` days only, `WEEK` days only, or `ALL` days|
|`[GROUP_EPISODES_WITHIN]` | This parameter contains 2 values: `[START_TIME]` and `[LENGTH]`. Only `main` sleep episodes that intersect or contain the period between [`START_TIME`, `START_TIME` + `LENGTH`] are taken into account to compute the features described below. Both `[START_TIME]` and `[LENGTH]` are in minutes. `[START_TIME]` is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. `[LENGTH]` is a number smaller than 1440 (24 hours). |
|`[LAST_NIGHT_END]` | Only `main` sleep episodes that start within the `LNE-LNE` interval [`LAST_NIGHT_END`, `LAST_NIGHT_END` + 23H 59M 59S] are taken into account to compute the features described below. `[LAST_NIGHT_END]` is a number ranging from 0 (midnight) to 1439 (23:59). |
Features description for `[FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE]`:
|Feature                                                             |Units |Description |
|------------------------------------- |----------------- |-------------------------------------------------------------|
|avgduration`[LEVEL]`main`[DAY_TYPE]` |minutes | Average duration of daily `LEVEL` sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|avgratioduration`[LEVEL]`withinmain`[DAY_TYPE]` |- | Average ratio between daily `LEVEL` time and in-bed time inferred from `main` sleep episodes. `LEVEL` is one of `SLEEP_LEVELS` (e.g. awake-classic or rem-stages). In-bed time is the total duration of all `main` sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|avgstarttimeofepisodemain`[DAY_TYPE]` |minutes | Average start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|avgendtimeofepisodemain`[DAY_TYPE]` |minutes | Average end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|avgmidpointofepisodemain`[DAY_TYPE]` |minutes | Average mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|stdstarttimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of start time of the first `main` sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|stdendtimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of end time of the last `main` sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|stdmidpointofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of mid time between the start of the first `main` sleep episode and the end of the last `main` sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the `DAY_TYPE` flag.
|socialjetlag |minutes | Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays.
|meanssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive start times.
|meanssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive end times.
|meanssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the average is computed over the squared differences of each pair of consecutive mid times.
|medianssdstarttimeofepisodemain |minutes squared | Same as `avgstarttimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive start times.
|medianssdendtimeofepisodemain |minutes squared | Same as `avgendtimeofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive end times.
|medianssdmidpointofepisodemain |minutes squared | Same as `avgmidpointofepisodemain[DAY_TYPE]` but the median is computed over the squared differences of each pair of consecutive mid times.
|avgduration`[LEVEL]`main`[DAY_TYPE]` |minutes | Average duration of daily sleep chunks of a `LEVEL`. Use the `DAY_TYPE` flag to include daily durations from weekend days only, weekdays, or both. Use `[LEVEL]` to group all levels in a single `all` category.
|avgratioduration`[LEVEL]`withinmain`[DAY_TYPE]` |- | Average of the daily ratio between the duration of sleep chunks of a `LEVEL` and total duration of all `main` sleep episodes in a day. When `INCLUDE_ALL_GROUPS` is `True` the `all` `LEVEL` is ignored since this feature is always 1. Use the `DAY_TYPE` flag to include start times from weekend days only, weekdays, or both.
|avgstarttimeofepisodemain`[DAY_TYPE]` |minutes | Average of all start times of the first `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include start times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|avgendtimeofepisodemain`[DAY_TYPE]` |minutes | Average of all end times of the last `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|avgmidpointofepisodemain`[DAY_TYPE]` |minutes | Average of all the differences between `avgendtime...` and `avgstarttime..` in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|stdstarttimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all start times of the first `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include start times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|stdendtimeofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all end times of the last `main` sleep episode within each `LNE-LNE` interval in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|stdmidpointofepisodemain`[DAY_TYPE]` |minutes | Standard deviation of all the differences between `avgendtime...` and `avgstarttime..` in a time segment. Use the `DAY_TYPE` flag to include end times from `LNE-LNE` intervals that start on weekend days only, weekdays, or both.
|socialjetlag |minutes | Difference in minutes between the avgmidpointofepisodemain of weekends and weekdays that belong to each time segment instance. If your time segment does not contain at least one week day and one weekend day this feature will be NA.
|rmssdmeanstarttimeofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `starttimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `starttimeofepisodemain` (bedtime) changed from night to night.
|rmssdmeanendtimeofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `endtimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `endtimeofepisodemain` (wake time) changed from night to night.
|rmssdmeanmidpointofepisodemain |minutes | Square root of the **mean** squared successive difference (RMSSD) between today's and yesterday's `midpointofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the mean of how someone's `midpointofepisodemain` (mid time between bedtime and wake time) changed from night to night.
|rmssdmedianstarttimeofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `starttimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `starttimeofepisodemain` (bedtime) changed from night to night.
|rmssdmedianendtimeofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `endtimeofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `endtimeofepisodemain` (wake time) changed from night to night.
|rmssdmedianmidpointofepisodemain |minutes | Square root of the **median** squared successive difference (RMSSD) between today's and yesterday's `midpointofepisodemain` values across the entire participant's sleep data grouped per time segment instance. It represents the median of how someone's `midpointofepisodemain` (average mid time between bedtime and wake time) changed from night to night.
!!! note "Assumptions/Observations"
1. These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average).
1. [This diagram](../../img/sleep_intraday_price.png) will help you understand how sleep episodes are chunked and grouped within time segments and `LNE-LNE` intervals for the PRICE provider.
1. We recommend you use periodic segments that start in the morning so RAPIDS can chunk and group sleep episodes overnight. Shifted segments (as any other segments) are labelled based on their start and end date times.
5. `avgstarttime...` and `avgendtime...` are roughly equivalent to an average bed and awake time only if you are using shifted segments.
1. The features of this provider are only available on time segments that are longer than 24 hours because they are based on descriptive statistics computed across daily values.
2. Even though Fitbit provides 2 types of sleep episodes (`main` and `nap`), only `main` sleep episodes are considered.
3. How do we assign sleep episodes to specific dates?
`START_TIME` and `LENGTH` control the dates that sleep episodes belong to. For a pair of `[START_TIME]` and `[LENGTH]`, sleep episodes (blue boxes) can only be placed at the following places:
<figure>
<img src="../../img/features_fitbit_sleep_intraday.png" max-width="100%" />
<figcaption>Relationship between sleep episodes and the given times`([START_TIME], [LENGTH])`</figcaption>
</figure>
- If the end time of a sleep episode is before `[START_TIME]`, it will belong to the day before its start date (e.g. sleep episode #1).
- if (1) the start time or the end time of a sleep episode are between (overlap) `[START_TIME]` and `[START_TIME] + [LENGTH]` or (2) the start time is before `[START_TIME]` and the end time is after `[START_TIME] + [LENGTH]`, it will belong to its start date (e.g. sleep episode #2, #3, #4, #5).
- If the start time of a sleep episode is after `START_TIME] + [LENGTH]`, it will belong to the day after its start date (e.g. sleep episode #6).
Only `main` sleep episodes that intersect or contain the period between `[START_TIME]` and `[START_TIME] + [LENGTH]` will be included in the feature computation. If we process the following `main` sleep episodes:
| episode |start|end|
|-|-|-|
|1|2021-02-01 12:00|2021-02-01 15:00|
|2|2021-02-01 21:00|2021-02-02 03:00|02-01
|3|2021-02-02 05:00|2021-02-02 08:00|02-01
|4|2021-02-02 11:00|2021-02-02 14:00|
|5|2021-02-02 19:00|2021-02-03 06:00|02-02
And our parameters:
- `[INCLUDE_EPISODES_INTERSECTING][START_TIME]` = 1320 (today's 22:00)
- `[INCLUDE_EPISODES_INTERSECTING][LENGTH]` = 720 (tomorrow's 10:00, or 22:00 + 12 hours)
Only sleep episodes 2, 3,and 5 would be considered.
4. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day's midnight.
5. All `main` sleep episodes are chunked within the requested [time segments](../../setup/configuration/#time-segments) which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example:
The daily features extracted on 2021-02-01 will be:
- starttimeofepisodemain (bedtime) is `21 * 60` (episode 2 start time 2021-02-01 21:00)
- endtimeofepisodemain (wake time) is `32 * 60 `(episode 3 end time 2021-02-02 08:00 + 24)
- midpointofepisodemain (midpoint sleep) is `[(21 * 60) + (32 * 60)] / 2`
The daily features extracted on 2021-02-02 will be:
- starttimeofepisodemain (bedtime) is `19 * 60` (episode 5 start time 2021-02-01 19:00)
- endtimeofepisodemain (wake time) is `30 * 60 `(episode 5 end time 2021-02-03 06:00 + 24)
- midpointofepisodemain (midpoint sleep) is `[(19 * 60) + (30 * 60)] / 2`
And `avgstarttimeofepisodemain[DAY_TYPE]` will be `([21 * 60] + [19 * 60]) / 2`
4. The reference point for all times is 00:00 of the first day in the LNE-LNE interval.
5. Sleep episodes are formed by 1-minute chunks that we group overnight starting from todays LNE and ending on tomorrows LNE or the end of that segment (whatever is first).
5. The features `avgstarttime...` and `avgendtime...` are the average of the first and last sleep episode across every LNE-LNE interval within a segment (`avgmidtime...` is the mid point between start and end). Therefore, only segments longer than 24hrs will be averaged across more than one LNE-LNE interval.
5. `socialjetlag` is only available on segment instances equal or longer than 48hrs that contain at least one weekday day and one weekend day, for example seven-day (weekly) segments.

View File

@ -9,6 +9,9 @@ Sensor parameters description for `[FITBIT_SLEEP_SUMMARY]`:
## RAPIDS provider
!!! hint "Understanding RAPIDS features"
[This diagram](../../img/sleep_summary_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments using `SLEEP_SUMMARY_LAST_NIGHT_END` for the RAPIDS provider.
!!! info "Available time segments"
- Only available for segments that span 1 or more complete days (e.g. Jan 1st 00:00 to Jan 3rd 23:59)
@ -26,14 +29,19 @@ Parameters description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`:
|Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `FITBIT_SLEEP_SUMMARY` features from the `RAPIDS` provider |
|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: `main`, `nap`, `all`. |
|`[SLEEP_TYPES]` | Types of sleep to be included in the feature extraction computation. There are three sleep types: `main`, `nap`, and `all`. The `all` type means both main sleep and naps are considered. |
|`[FEATURES]` | Features to be computed from sleep summary data, see table below |
|`[FITBIT_DATA_STREAMS][data stream][SLEEP_SUMMARY_LAST_NIGHT_END]` | As an exception, the `LAST_NIGHT_END` parameter for this provider is in the data stream configuration section. This parameter controls how sleep episodes are assigned to different days and affects wake and bedtimes.|
Features description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`:
|Feature |Units |Description |
|------------------------------ |---------- |-------------------------------------------- |
|firstwaketimeTYPE |minutes |First wake time for a certain sleep type during a time segment. Wake time is number of minutes after midnight of a sleep episode's end time.
|lastwaketimeTYPE |minutes |Last wake time for a certain sleep type during a time segment. Wake time is number of minutes after midnight of a sleep episode's end time.
|firstbedtimeTYPE |minutes |First bedtime for a certain sleep type during a time segment. Bedtime is number of minutes after midnight of a sleep episode's start time.
|lastbedtimeTYPE |minutes |Last bedtime for a certain sleep type during a time segment. Bedtime is number of minutes after midnight of a sleep episode's start time.
|countepisodeTYPE |episodes |Number of sleep episodes for a certain sleep type during a time segment.
|avgefficiencyTYPE |scores |Average sleep efficiency for a certain sleep type during a time segment.
|sumdurationafterwakeupTYPE |minutes |Total duration the user stayed in bed after waking up for a certain sleep type during a time segment.
@ -50,10 +58,13 @@ Features description for `[FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS]`:
!!! note "Assumptions/Observations"
1. There are three sleep types (TYPE): `main`, `nap`, `all`. The `all` type contains both main sleep and naps.
1. [This diagram](../../img/sleep_summary_rapids.png) will help you understand how sleep episodes are chunked and grouped within time segments using `LNE` for the RAPIDS provider.
1. There are three sleep types (TYPE): `main`, `nap`, `all`. The `all` type groups both `main` sleep and `naps`. All types are based on Fitbit's labels.
2. There are two versions of Fitbits sleep API ([version 1](https://dev.fitbit.com/build/reference/web-api/sleep-v1/) and [version 1.2](https://dev.fitbit.com/build/reference/web-api/sleep/)), and each provides raw sleep data in a different format:
- _Count & duration summaries_. `v1` contains `count_awake`, `duration_awake`, `count_awakenings`, `count_restless`, and `duration_restless` fields for every sleep record but `v1.2` does not.
3. _API columns_. Features are computed based on the values provided by Fitbits API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`.
3. _API columns_. Most features are computed based on the values provided by Fitbits API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`.
4. Bed time and sleep duration are based on episodes that started between todays LNE and tomorrows LNE while awake time is based on the episodes that started between yesterdays LNE and todays LNE
5. The reference point for bed/awake times is todays 00:00. You can have bedtimes larger than 24 and awake times smaller than 0
6. These features are only available for time segments that span midnight to midnight of the same or different day.
7. We include first and last wake and bedtimes because, when `LAST_NIGHT_END` is 10 am, the first bedtime could match a nap at 2 pm, and the last bedtime could match a main overnight sleep episode that starts at 10pm.
5. Set the value for `SLEEP_SUMMARY_LAST_NIGHT_END` int the config parameter [FITBIT_DATA_STREAMS][data stream][SLEEP_SUMMARY_LAST_NIGHT_END].

Binary file not shown.

After

Width:  |  Height:  |  Size: 840 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 426 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 294 KiB

View File

@ -500,19 +500,19 @@ Modify the following keys in your `config.yaml` depending on the [data stream](.
# AVAILABLE:
fitbitjson_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False
SLEEP_SUMMARY_LAST_NIGHT_END: 660
fitbitjson_csv:
FOLDER: data/external/fitbit_csv
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False
SLEEP_SUMMARY_LAST_NIGHT_END: 660
fitbitparsed_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False
SLEEP_SUMMARY_LAST_NIGHT_END: 660
fitbitparsed_csv:
FOLDER: data/external/fitbit_csv
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: False
SLEEP_SUMMARY_LAST_NIGHT_END: 660
```
@ -524,7 +524,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](.
| Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up |
| `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). |
| `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. |
--8<---- "docs/snippets/database.md"
@ -535,7 +535,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](.
| Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[FOLDER]` | Folder where you have to place a CSV file **per** Fitbit sensor. Each file has to contain all the data from every participant you want to process. |
| `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). |
| `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. |
=== "fitbitparsed_mysql"
@ -546,7 +546,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](.
| Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[DATABASE_GROUP]` | A database credentials group. Read the instructions below to set it up |
| `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). |
| `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. |
--8<---- "docs/snippets/database.md"
@ -557,7 +557,7 @@ Modify the following keys in your `config.yaml` depending on the [data stream](.
| Key | Description |
|---------------------|----------------------------------------------------------------------------------------------------------------------------|
| `[FOLDER]` | Folder where you have to place a CSV file **per** Fitbit sensor. Each file has to contain all the data from every participant you want to process. |
| `[SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]` | One of `start` or `end`. Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). |
| `[SLEEP_SUMMARY_LAST_NIGHT_END]` | Segments are assigned based on this parameter. Any sleep episodes starts between today's SLEEP_SUMMARY_LAST_NIGHT_END (LNE) and tomorrow's LNE is regarded as today's sleep episode. While today's bedtime is based on today's sleep episodes, today's wake time is based on yesterday's sleep episodes. |
=== "Empatica"

View File

@ -58,15 +58,18 @@ def parseSleepData(sleep_data):
def main(json_raw, stream_parameters):
parsed_data = parseSleepData(json_raw)
parsed_data["local_date_time"] = (parsed_data["local_start_date_time"] - pd.Timedelta(minutes=stream_parameters["SLEEP_SUMMARY_LAST_NIGHT_END"])).dt.strftime('%Y-%m-%d 00:00:00')
# complete missing dates
missed_dates = list(set([x.strftime('%Y-%m-%d 00:00:00') for x in pd.date_range(parsed_data["local_date_time"].min(), parsed_data["local_date_time"].max()).to_pydatetime()]) - set(parsed_data["local_date_time"]))
parsed_data = pd.concat([parsed_data, pd.DataFrame({"local_date_time": missed_dates})], axis=0)
parsed_data.sort_values(by=["local_date_time", "local_start_date_time"], inplace=True)
parsed_data["device_id"] = parsed_data["device_id"].interpolate(method="pad")
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
if pd.api.types.is_datetime64_any_dtype( parsed_data['local_start_date_time']):
parsed_data['local_start_date_time'] = parsed_data['local_start_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
if pd.api.types.is_datetime64_any_dtype( parsed_data['local_end_date_time']):
parsed_data['local_end_date_time'] = parsed_data['local_end_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
if stream_parameters["SLEEP_SUMMARY_EPISODE_DAY_ANCHOR"] == "start":
parsed_data["local_date_time"] = parsed_data['local_start_date_time']
else:
parsed_data["local_date_time"] = parsed_data['local_end_date_time']
return(parsed_data)

View File

@ -3,6 +3,15 @@ source("renv/activate.R")
library(yaml)
library(dplyr)
library(readr)
fix_pandas_nan_in_string_columns <- function(column){
return(vapply(column, function(value) {
if(!is.character(value) && !is.nan(value))
stop("The reticulate conversion from the python mutation script to r failed. One or more returned columns are a list with unsupported mixed types. We only handle string columns with np.nan values. Open a GitHub issue or fix the mutation script")
return(ifelse(is.nan(value), NA_character_, value))
}, FUN.VALUE = character(1)))
}
# we use reticulate but only load it if we are going to use it to minimize the case when old RAPIDS deployments need to update ther renv
mutate_data <- function(scripts, data, data_configuration){
for(script in scripts){
@ -25,6 +34,7 @@ mutate_data <- function(scripts, data, data_configuration){
if(py_has_attr(script_functions, "main")){
message(paste("Applying mutation script", script))
data <- script_functions$main(data, data_configuration)
data <- data %>% mutate(across(where(is.list), fix_pandas_nan_in_string_columns))
} else{
stop(paste0("The following mutation script does not have a main function: ", script))
}

View File

@ -3,7 +3,7 @@ import itertools
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute):
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute, levels_include_all_groups):
features_fullnames = ["local_segment"]
@ -14,7 +14,7 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day
for feature in intraday_features_to_compute:
if feature == "avgduration":
features_fullnames.extend(["avgduration" + x[0] + "main" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)])
features_fullnames.extend(["avgduration" + x[0] + "main" + x[1].lower() for x in itertools.product(sleep_level_with_group + (["all"] if levels_include_all_groups else []), day_types_to_compute)])
elif feature == "avgratioduration":
features_fullnames.extend(["avgratioduration" + x[0] + "withinmain" + x[1].lower() for x in itertools.product(sleep_level_with_group, day_types_to_compute)])
elif feature in ["avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain"]:
@ -69,7 +69,7 @@ def extractDailyFeatures(sleep_data):
return daily_features
def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_features_to_compute, sleep_intraday_features):
def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_features_to_compute, sleep_intraday_features, levels_include_all_groups):
if day_type == "WEEKEND":
daily_features = daily_features[daily_features["is_weekend"] == 0]
elif day_type == "WEEK":
@ -110,6 +110,8 @@ def statsOfDailyFeatures(daily_features, day_type, sleep_levels, intraday_featur
if "avgratioduration" in intraday_features_to_compute:
col = "ratioduration" + sleep_level + sleep_level_group.lower() + "withinmain"
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", col]].groupby("local_segment")[col].mean().to_frame().rename(columns={col: "avg" + col + day_type.lower()})], axis=1)
if levels_include_all_groups and ("avgduration" in intraday_features_to_compute):
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment", "durationinbedmain"]].groupby("local_segment")["durationinbedmain"].mean().to_frame().rename(columns={"durationinbedmain": "avgdurationallmain" + day_type.lower()})], axis=1)
return sleep_intraday_features
@ -127,28 +129,28 @@ def socialJetLagFeature(daily_features, sleep_intraday_features):
return sleep_intraday_features
def MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features):
def RMSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features):
date_idx = pd.DataFrame(pd.date_range(start=daily_features["fake_date"].min(), end=daily_features["fake_date"].max(), freq="D"), columns=["fake_date"])
date_idx["fake_date"] = date_idx["fake_date"].dt.date
daily_features = daily_features.merge(date_idx, on="fake_date", how="right")
for col in ["starttimeofepisodemain", "endtimeofepisodemain", "midpointofepisodemain"]:
daily_features[col + "_diff"] = daily_features[col].diff()
daily_features[col + "_diff"] = daily_features[col].diff().pow(2)
if "meanssdstarttimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].mean().to_frame().rename(columns={"starttimeofepisodemain_diff": "meanssdstarttimeofepisodemain"})], axis=1)
if "meanssdendtimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].mean().to_frame().rename(columns={"endtimeofepisodemain_diff": "meanssdendtimeofepisodemain"})], axis=1)
if "meanssdmidpointofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].mean().to_frame().rename(columns={"midpointofepisodemain_diff": "meanssdmidpointofepisodemain"})], axis=1)
if "rmssdmeanstarttimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"starttimeofepisodemain_diff": "rmssdmeanstarttimeofepisodemain"})], axis=1)
if "rmssdmeanendtimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"endtimeofepisodemain_diff": "rmssdmeanendtimeofepisodemain"})], axis=1)
if "rmssdmeanmidpointofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].mean().pow(0.5).to_frame().rename(columns={"midpointofepisodemain_diff": "rmssdmeanmidpointofepisodemain"})], axis=1)
if "medianssdstarttimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].median().to_frame().rename(columns={"starttimeofepisodemain_diff": "medianssdstarttimeofepisodemain"})], axis=1)
if "medianssdendtimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].median().to_frame().rename(columns={"endtimeofepisodemain_diff": "medianssdendtimeofepisodemain"})], axis=1)
if "medianssdmidpointofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].median().to_frame().rename(columns={"midpointofepisodemain_diff": "medianssdmidpointofepisodemain"})], axis=1)
if "rmssdmedianstarttimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","starttimeofepisodemain_diff"]].groupby("local_segment")["starttimeofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"starttimeofepisodemain_diff": "rmssdmedianstarttimeofepisodemain"})], axis=1)
if "rmssdmedianendtimeofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","endtimeofepisodemain_diff"]].groupby("local_segment")["endtimeofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"endtimeofepisodemain_diff": "rmssdmedianendtimeofepisodemain"})], axis=1)
if "rmssdmedianmidpointofepisodemain" in intraday_features_to_compute:
sleep_intraday_features = pd.concat([sleep_intraday_features, daily_features[["local_segment","midpointofepisodemain_diff"]].groupby("local_segment")["midpointofepisodemain_diff"].median().pow(0.5).to_frame().rename(columns={"midpointofepisodemain_diff": "rmssdmedianmidpointofepisodemain"})], axis=1)
return sleep_intraday_features
@ -157,16 +159,16 @@ def MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_fe
def price_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
daily_start_time = provider["GROUP_EPISODES_WITHIN"]["START_TIME"]
daily_end_time = daily_start_time + provider["GROUP_EPISODES_WITHIN"]["LENGTH"]
last_night_end = provider["LAST_NIGHT_END"]
sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"]
levels_include_all_groups = provider["SLEEP_LEVELS"]["INCLUDE_ALL_GROUPS"]
requested_sleep_levels = provider["SLEEP_LEVELS"]
requested_day_types = provider["DAY_TYPES"]
# Name of the features this function can compute
base_intraday_features = ["avgduration", "avgratioduration", "avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", "socialjetlag", "meanssdstarttimeofepisodemain", "meanssdendtimeofepisodemain", "meanssdmidpointofepisodemain", "medianssdstarttimeofepisodemain", "medianssdendtimeofepisodemain", "medianssdmidpointofepisodemain"]
base_intraday_features = ["avgduration", "avgratioduration", "avgstarttimeofepisodemain", "avgendtimeofepisodemain", "avgmidpointofepisodemain", "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", "socialjetlag", "rmssdmeanstarttimeofepisodemain", "rmssdmeanendtimeofepisodemain", "rmssdmeanmidpointofepisodemain", "rmssdmedianstarttimeofepisodemain", "rmssdmedianendtimeofepisodemain", "rmssdmedianmidpointofepisodemain"]
base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"],
"STAGES": ["wake", "deep", "light", "rem"],
"UNIFIED": ["awake", "asleep"]}
@ -178,7 +180,7 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg
day_types_to_compute = list(set(requested_day_types) & set(base_day_types))
# Full names
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute)
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, day_types_to_compute, levels_include_all_groups)
sleep_intraday_features = pd.DataFrame(columns=features_fullnames)
# Filter by segemnts and chunk episodes
@ -206,26 +208,18 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg
main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"]
# Extract fake date
""" The rule used for fake date extraction
set DS = daily_start_time, DE = daily_end_time
set start = start_minutes, end = end_minutes
if (DS <= start < DE) or (DS < end <= DE) or (start <= DS and end >= DE):
if start_minutes >= last_night_end
assign today
elif if end <= DS:
else:
assign yesterday
else: (same as start >=DE)
assign tomorrow
"""
main_sleep_episodes["fake_date_delta"] = main_sleep_episodes[["start_minutes", "end_minutes"]].apply(lambda row: 0 if ((row["start_minutes"] >= daily_start_time and row["start_minutes"] < daily_end_time) or (row["end_minutes"] > daily_start_time and row["end_minutes"] <= daily_end_time) or (row["start_minutes"] <= daily_start_time and row["end_minutes"] >= daily_end_time)) else -1 if (row["end_minutes"] <= daily_start_time) else 1, axis=1)
main_sleep_episodes["fake_date_delta"] = main_sleep_episodes[["start_minutes"]].apply(lambda row: 0 if row["start_minutes"] >= last_night_end else -1, axis=1)
main_sleep_episodes["fake_date"] = (main_sleep_episodes["local_start_date_time"] + pd.to_timedelta(main_sleep_episodes["fake_date_delta"], unit="d")).dt.date
# Update "start_minutes" column based on START_TIME
main_sleep_episodes["start_minutes"] = main_sleep_episodes[["start_minutes", "fake_date_delta"]].apply(lambda row: row["start_minutes"] - 24 * 60 * row["fake_date_delta"], axis=1)
main_sleep_episodes["end_minutes"] = main_sleep_episodes["start_minutes"] + main_sleep_episodes["durationinbed"]
# We keep a sleep episode that intersects or contains the period between [START_TIME, START_TIME + LENGTH], aka [daily_start_time, daily_end_time].
main_sleep_episodes = main_sleep_episodes.query("(start_minutes >= @daily_start_time and start_minutes < @daily_end_time) or (end_minutes > @daily_start_time and end_minutes <= @daily_end_time) or (start_minutes <= @daily_start_time and end_minutes >= @daily_end_time)")
# Sort main sleep episodes based on fake_date and start_minutes
main_sleep_episodes = main_sleep_episodes.sort_values(["fake_date", "start_minutes"])
# Extract daily features
@ -233,10 +227,10 @@ def price_features(sensor_data_files, time_segment, provider, filter_data_by_seg
# Extract features per segment based on daily features
for day_type in day_types_to_compute:
sleep_intraday_features = statsOfDailyFeatures(daily_features, day_type, sleep_levels_to_compute, intraday_features_to_compute, sleep_intraday_features)
sleep_intraday_features = statsOfDailyFeatures(daily_features, day_type, sleep_levels_to_compute, intraday_features_to_compute, sleep_intraday_features, levels_include_all_groups)
if "socialjetlag" in intraday_features_to_compute:
sleep_intraday_features = socialJetLagFeature(daily_features, sleep_intraday_features)
sleep_intraday_features = MSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features)
sleep_intraday_features = RMSSDFeatures(daily_features, intraday_features_to_compute, sleep_intraday_features)
sleep_intraday_features.index.name = "local_segment"
sleep_intraday_features.reset_index(inplace=True)

View File

@ -2,7 +2,7 @@ import pandas as pd
from datetime import datetime
import itertools
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all):
def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, levels_include_all_groups):
features_fullname = ["local_segment"]
@ -11,8 +11,8 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sle
for sleep_level in sleep_levels_to_compute[sleep_level_group]:
sleep_level_with_group.append(sleep_level + sleep_level_group.lower())
if consider_all:
features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group + ["all"], sleep_types_to_compute + ["all"])])
if levels_include_all_groups:
features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group + ["all"], sleep_types_to_compute)])
else:
features_fullname.extend([x[0] + x[1] + x[2] for x in itertools.product(intraday_features_to_compute["LEVELS_AND_TYPES"], sleep_level_with_group, sleep_types_to_compute)])
if "ACROSS_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]:
@ -20,9 +20,9 @@ def featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sle
if "ACROSS_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"] and "main" in sleep_types_to_compute:
features_fullname.extend(["ratio" + x + "main" for x in intraday_features_to_compute["RATIOS_TYPE"]])
if "WITHIN_LEVELS" in intraday_features_to_compute["RATIOS_SCOPE"]:
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_types_to_compute, sleep_level_with_group)])
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], set(sleep_types_to_compute) & set(["main", "nap"]), sleep_level_with_group)])
if "WITHIN_TYPES" in intraday_features_to_compute["RATIOS_SCOPE"]:
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group, sleep_types_to_compute)])
features_fullname.extend(["ratio" + x[0] + x[1] + "within" + x[2] for x in itertools.product(intraday_features_to_compute["RATIOS_TYPE"], sleep_level_with_group, set(sleep_types_to_compute) & set(["main", "nap"]))])
features_fullname.extend(intraday_features_to_compute["ROUTINE"])
return features_fullname
@ -68,26 +68,28 @@ def statsFeatures(sleep_episodes, features, episode_type):
def allStatsFeatures(sleep_data, base_sleep_levels, base_sleep_types, features, sleep_intraday_features):
# For CLASSIC
for sleep_level, sleep_type in itertools.product(base_sleep_levels["CLASSIC"] + ["all"], base_sleep_types + ["all"]):
sleep_episodes_classic = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
for sleep_level, sleep_type in itertools.product(base_sleep_levels["CLASSIC"] + ["all"], base_sleep_types):
sleep_episodes_classic = sleep_data[sleep_data["type"] == "classic"]
sleep_episodes_classic = sleep_episodes_classic[sleep_episodes_classic["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_episodes_classic
sleep_episodes_classic = sleep_episodes_classic[sleep_episodes_classic["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_classic
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_classic, features, sleep_level + "classic" + sleep_type)], axis=1)
# For STAGES
for sleep_level, sleep_type in itertools.product(base_sleep_levels["STAGES"] + ["all"], base_sleep_types + ["all"]):
sleep_episodes_stages = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
for sleep_level, sleep_type in itertools.product(base_sleep_levels["STAGES"] + ["all"], base_sleep_types):
sleep_episodes_stages = sleep_data[sleep_data["type"] == "stages"]
sleep_episodes_stages = sleep_episodes_stages[sleep_episodes_stages["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_episodes_stages
sleep_episodes_stages = sleep_episodes_stages[sleep_episodes_stages["level"] == sleep_level] if sleep_level != "all" else sleep_episodes_stages
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_stages, features, sleep_level + "stages" + sleep_type)], axis=1)
# For UNIFIED
for sleep_level, sleep_type in itertools.product(base_sleep_levels["UNIFIED"] + ["all"], base_sleep_types + ["all"]):
for sleep_level, sleep_type in itertools.product(base_sleep_levels["UNIFIED"] + ["all"], base_sleep_types):
sleep_episodes_unified = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
sleep_episodes_unified = sleep_episodes_unified[sleep_episodes_unified["unified_level"] == (0 if sleep_level == "awake" else 1)] if sleep_level != "all" else sleep_episodes_unified
sleep_episodes_unified = mergeSleepEpisodes(sleep_episodes_unified, ["local_segment", "unified_level_episode_id"])
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_unified, features, sleep_level + "unified" + sleep_type)], axis=1)
# Ignore the levels (e.g. countepisode[all][main])
for sleep_type in base_sleep_types + ["all"]:
for sleep_type in base_sleep_types:
sleep_episodes_none = sleep_data[sleep_data["is_main_sleep"] == (1 if sleep_type == "main" else 0)] if sleep_type != "all" else sleep_data
sleep_episodes_none = mergeSleepEpisodes(sleep_episodes_none, ["local_segment", "type_episode_id"])
sleep_intraday_features = pd.concat([sleep_intraday_features, statsFeatures(sleep_episodes_none, features, "all" + sleep_type)], axis=1)
@ -151,6 +153,11 @@ def ratiosFeatures(sleep_intraday_features, ratios_types, ratios_scopes, sleep_l
# 7) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "main"
# 8) ratios_type: "duration", sleep_levels_combined: ("unified", "asleep"), sleep_type: "nap"
for ratios_type, sleep_levels_combined, sleep_type in itertools.product(ratios_types, sleep_level_with_group, sleep_types):
# "all" sleep type will not be cosidered for any ratios features since it will be 1 all the time
if sleep_type == "all":
continue
sleep_level_group, sleep_level = sleep_levels_combined[0], sleep_levels_combined[1]
agg_func = "countepisode" if ratios_type == "count" else "sumduration"
@ -167,36 +174,36 @@ def ratiosFeatures(sleep_intraday_features, ratios_types, ratios_scopes, sleep_l
return sleep_intraday_features
def singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features):
def singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, sleep_type, sleep_intraday_features):
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["is_main_sleep"] == (1 if sleep_type == "mainsleep" else 0)]
if "starttimefirst" + sleep_type in routine:
grouped_first = sleep_intraday_data.groupby(["local_segment"]).first()
if reference_time == "MIDNIGHT":
if routine_reference_time == "MIDNIGHT":
sleep_intraday_features["starttimefirst" + sleep_type] = grouped_first["local_start_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60)
elif reference_time == "START_OF_THE_SEGMENT":
elif routine_reference_time == "START_OF_THE_SEGMENT":
sleep_intraday_features["starttimefirst" + sleep_type] = (grouped_first["start_timestamp"] - grouped_first["segment_start_timestamp"]) / (60 * 1000)
else:
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: ROUTINE_REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
if "endtimelast" + sleep_type in routine:
grouped_last = sleep_intraday_data.groupby(["local_segment"]).last()
if reference_time == "MIDNIGHT":
if routine_reference_time == "MIDNIGHT":
sleep_intraday_features["endtimelast" + sleep_type] = grouped_last["local_end_date_time"].apply(lambda x: x.hour * 60 + x.minute + x.second / 60)
elif reference_time == "START_OF_THE_SEGMENT":
elif routine_reference_time == "START_OF_THE_SEGMENT":
sleep_intraday_features["endtimelast" + sleep_type] = (grouped_last["end_timestamp"] - grouped_last["segment_start_timestamp"]) / (60 * 1000)
else:
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
raise ValueError("Please check FITBIT_SLEEP_INTRADAY section of config.yaml: ROUTINE_REFERENCE_TIME can only be MIDNIGHT or START_OF_THE_SEGMENT.")
return sleep_intraday_features
def routineFeatures(sleep_intraday_data, routine, reference_time, sleep_type, sleep_intraday_features):
def routineFeatures(sleep_intraday_data, routine, routine_reference_time, sleep_type, sleep_intraday_features):
if "starttimefirstmainsleep" in routine or "endtimelastmainsleep" in routine:
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "mainsleep", sleep_intraday_features)
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, "mainsleep", sleep_intraday_features)
if "starttimefirstnap" in routine or "endtimelastnap" in routine:
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, reference_time, "nap", sleep_intraday_features)
sleep_intraday_features = singleSleepTypeRoutineFeatures(sleep_intraday_data, routine, routine_reference_time, "nap", sleep_intraday_features)
return sleep_intraday_features
@ -205,11 +212,11 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
sleep_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
consider_all = provider["FEATURES"]["LEVELS_AND_TYPES_COMBINING_ALL"]
include_sleep_later_than = provider["INCLUDE_SLEEP_LATER_THAN"]
reference_time = provider["REFERENCE_TIME"]
last_night_end = provider["LAST_NIGHT_END"]
routine_reference_time = provider["ROUTINE_REFERENCE_TIME"]
requested_intraday_features = provider["FEATURES"]
levels_include_all_groups = provider["SLEEP_LEVELS"]["INCLUDE_ALL_GROUPS"]
requested_sleep_levels = provider["SLEEP_LEVELS"]
requested_sleep_types = provider["SLEEP_TYPES"]
@ -221,7 +228,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
base_sleep_levels = {"CLASSIC": ["awake", "restless", "asleep"],
"STAGES": ["wake", "deep", "light", "rem"],
"UNIFIED": ["awake", "asleep"]}
base_sleep_types = ["main", "nap"]
base_sleep_types = ["main", "nap", "all"]
# The subset of requested features this function can compute
intraday_features_to_compute = {key: list(set(requested_intraday_features[key]) & set(base_intraday_features[key])) for key in requested_intraday_features if key in base_intraday_features}
@ -229,13 +236,13 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
sleep_types_to_compute = list(set(requested_sleep_types) & set(base_sleep_types))
# Full names
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, consider_all)
features_fullnames = featuresFullNames(intraday_features_to_compute, sleep_levels_to_compute, sleep_types_to_compute, levels_include_all_groups)
sleep_intraday_features = pd.DataFrame(columns=features_fullnames)
# Include sleep later than
# Any 1-minute sleep chuncks with a local time before LAST_NIGHT_END will be discarded.
start_minutes = sleep_intraday_data.groupby("start_timestamp").first()["local_time"].apply(lambda x: int(x.split(":")[0]) * 60 + int(x.split(":")[1]) + int(x.split(":")[2]) / 60).to_frame().rename(columns={"local_time": "start_minutes"}).reset_index()
sleep_intraday_data = sleep_intraday_data.merge(start_minutes, on="start_timestamp", how="left")
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["start_minutes"] >= include_sleep_later_than]
sleep_intraday_data = sleep_intraday_data[sleep_intraday_data["start_minutes"] >= last_night_end]
del sleep_intraday_data["start_minutes"]
sleep_intraday_data = filter_data_by_segment(sleep_intraday_data, time_segment)
@ -254,7 +261,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
sleep_intraday_features = ratiosFeatures(sleep_intraday_features, intraday_features_to_compute["RATIOS_TYPE"], intraday_features_to_compute["RATIOS_SCOPE"], sleep_levels_to_compute, sleep_types_to_compute)
# ROUTINE: only compute requested features
sleep_intraday_features = routineFeatures(sleep_intraday_data, intraday_features_to_compute["ROUTINE"], reference_time, sleep_types_to_compute, sleep_intraday_features)
sleep_intraday_features = routineFeatures(sleep_intraday_data, intraday_features_to_compute["ROUTINE"], routine_reference_time, sleep_types_to_compute, sleep_intraday_features)
# Reset index and discard features which are not requested by user
sleep_intraday_features.index.name = "local_segment"

View File

@ -44,6 +44,21 @@ def extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features, sl
if "countepisode" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_count[["timestamp"]], how="outer").rename(columns={"timestamp": "countepisode" + sleep_type})
features_first = sleep_summary_data[["local_segment", "minutes_start_episode", "minutes_end_episode"]].groupby(["local_segment"]).first()
if "firstwaketime" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_first[["minutes_end_episode"]].shift(), how="outer").rename(columns={"minutes_end_episode": "firstwaketime" + sleep_type})
if "firstbedtime" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_first[["minutes_start_episode"]], how="outer").rename(columns={"minutes_start_episode": "firstbedtime" + sleep_type})
features_last = sleep_summary_data[["local_segment", "minutes_start_episode", "minutes_end_episode"]].groupby(["local_segment"]).last()
if "lastwaketime" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_last[["minutes_end_episode"]].shift(), how="outer").rename(columns={"minutes_end_episode": "lastwaketime" + sleep_type})
if "lastbedtime" in summary_features:
sleep_summary_features = sleep_summary_features.join(features_last[["minutes_start_episode"]], how="outer").rename(columns={"minutes_start_episode": "lastbedtime" + sleep_type})
return sleep_summary_features
@ -55,7 +70,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
requested_sleep_types = provider["SLEEP_TYPES"]
# name of the features this function can compute
base_summary_features = ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
base_summary_features = ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
base_sleep_types = ["main", "nap", "all"]
# the subset of requested features this function can compute
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features))
@ -63,13 +78,15 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
# full names
features_fullnames_to_compute = ["".join(feature) for feature in itertools.product(summary_features_to_compute, sleep_types_to_compute)]
colnames_can_be_zero = [col for col in features_fullnames_to_compute if "avgefficiency" not in col]
colnames_can_be_zero = ["".join(feature) for feature in itertools.product(set(summary_features_to_compute) - set(["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "avgefficiency"]), sleep_types_to_compute)]
# extract features from summary data
sleep_summary_features = pd.DataFrame(columns=["local_segment"] + features_fullnames_to_compute)
if not sleep_summary_data.empty:
sleep_summary_data = filter_data_by_segment(sleep_summary_data, time_segment)
notna_segments = sleep_summary_data[sleep_summary_data["type"].notna()]["local_segment"].unique()
if not sleep_summary_data.empty:
# only keep the segments start at 00:00:00 and end at 23:59:59
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
@ -78,13 +95,19 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
segment_regex = "{}#{},{}".format(time_segment, datetime_start_regex, datetime_end_regex)
sleep_summary_data = sleep_summary_data[sleep_summary_data["local_segment"].str.match(segment_regex)]
# calculate number of minutes after segment's start date time
dt_cols = ["local_start_date_time", "local_end_date_time", "local_date_time"]
sleep_summary_data[dt_cols] = sleep_summary_data[dt_cols].apply(pd.to_datetime)
sleep_summary_data["minutes_start_episode"] = (sleep_summary_data["local_start_date_time"] - sleep_summary_data["local_date_time"]) / pd.Timedelta(minutes=1)
sleep_summary_data["minutes_end_episode"] = (sleep_summary_data["local_end_date_time"] - (sleep_summary_data["local_date_time"] + pd.Timedelta(days=1))) / pd.Timedelta(minutes=1)
if not sleep_summary_data.empty:
sleep_summary_features = pd.DataFrame()
for sleep_type in sleep_types_to_compute:
sleep_summary_features = extractSleepFeaturesFromSummaryData(sleep_summary_data, summary_features_to_compute, sleep_type, sleep_summary_features)
sleep_summary_features[colnames_can_be_zero] = sleep_summary_features[colnames_can_be_zero].fillna(0)
sleep_summary_features.loc[notna_segments, colnames_can_be_zero] = sleep_summary_features.loc[notna_segments, colnames_can_be_zero].fillna(0)
sleep_summary_features = sleep_summary_features.reset_index()

View File

@ -827,22 +827,44 @@ properties:
type: string
fitbitjson_mysql:
type: object
required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]
required: [DATABASE_GROUP, SLEEP_SUMMARY_LAST_NIGHT_END]
properties:
DATABASE_GROUP:
type: string
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR:
type: string
enum: ["start", "end"]
SLEEP_SUMMARY_LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
fitbitparsed_mysql:
type: object
required: [DATABASE_GROUP, SLEEP_SUMMARY_EPISODE_DAY_ANCHOR]
required: [DATABASE_GROUP, SLEEP_SUMMARY_LAST_NIGHT_END]
properties:
DATABASE_GROUP:
type: string
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR:
SLEEP_SUMMARY_LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
fitbitjson_csv:
type: object
required: [FOLDER, SLEEP_SUMMARY_LAST_NIGHT_END]
properties:
FOLDER:
type: string
enum: ["start", "end"]
SLEEP_SUMMARY_LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
fitbitparsed_csv:
type: object
required: [FOLDER, SLEEP_SUMMARY_LAST_NIGHT_END]
properties:
FOLDER:
type: string
SLEEP_SUMMARY_LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
FITBIT_DATA_YIELD:
type: object
@ -926,7 +948,7 @@ properties:
uniqueItems: True
items:
type: string
enum: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
enum: ["firstwaketime", "lastwaketime", "firstbedtime", "lastbedtime", "countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"]
SLEEP_TYPES:
type: array
uniqueItems: True
@ -951,10 +973,8 @@ properties:
- properties:
FEATURES:
type: object
required: [LEVELS_AND_TYPES_COMBINING_ALL, LEVELS_AND_TYPES, RATIOS_TYPE, RATIOS_SCOPE, ROUTINE]
required: [LEVELS_AND_TYPES, RATIOS_TYPE, RATIOS_SCOPE, ROUTINE]
properties:
LEVELS_AND_TYPES_COMBINING_ALL:
type: boolean
LEVELS_AND_TYPES:
type: array
uniqueItems: True
@ -981,8 +1001,10 @@ properties:
enum: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
SLEEP_LEVELS:
type: object
required: [CLASSIC, STAGES, UNIFIED]
required: [INCLUDE_ALL_GROUPS, CLASSIC, STAGES, UNIFIED]
properties:
INCLUDE_ALL_GROUPS:
type: boolean
CLASSIC:
type: array
uniqueItems: True
@ -1006,12 +1028,12 @@ properties:
uniqueItems: True
items:
type: string
enum: [main, nap]
INCLUDE_SLEEP_LATER_THAN:
enum: [main, nap, all]
LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
REFERENCE_TIME:
ROUTINE_REFERENCE_TIME:
type: string
enum: [MIDNIGHT, START_OF_THE_SEGMENT]
PRICE:
@ -1022,11 +1044,13 @@ properties:
uniqueItems: True
items:
type: string
enum: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
enum: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, stdstarttimeofepisodemain, stdendtimeofepisodemain, stdmidpointofepisodemain, socialjetlag, rmssdmeanstarttimeofepisodemain, rmssdmeanendtimeofepisodemain, rmssdmeanmidpointofepisodemain, rmssdmedianstarttimeofepisodemain, rmssdmedianendtimeofepisodemain, rmssdmedianmidpointofepisodemain]
SLEEP_LEVELS:
type: object
required: [CLASSIC, STAGES, UNIFIED]
required: [INCLUDE_ALL_GROUPS, CLASSIC, STAGES, UNIFIED]
properties:
INCLUDE_ALL_GROUPS:
type: boolean
CLASSIC:
type: array
uniqueItems: True
@ -1051,18 +1075,10 @@ properties:
items:
type: string
enum: [WEEKEND, WEEK, ALL]
GROUP_EPISODES_WITHIN:
type: object
required: [START_TIME, LENGTH]
properties:
START_TIME:
LAST_NIGHT_END:
type: number
minimum: 0
maximum: 1439
LENGTH:
type: number
minimum: 0
maximum: 1440
additionalProperties:
$ref: "#/definitions/PROVIDER"