Update config & docs of sleep features for example workflow

pull/134/head
Meng Li 2021-04-27 11:44:15 -04:00
parent 809845143f
commit c900712960
2 changed files with 10 additions and 15 deletions

View File

@ -69,7 +69,7 @@ Note you will see a lot of warning messages, you can ignore them since they happ
??? info "6. Feature cleaning."
In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in `example_profile/example_config.yaml`.
After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 107 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones.
After this step, we kept 158 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 106 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones.
Feature cleaning for the individual models is done in the `clean_sensor_features_for_individual_participants` rule and for the population model in the `clean_sensor_features_for_all_participants` rule in `rules/models.smk`.

View File

@ -308,19 +308,19 @@ FITBIT_DATA_STREAMS:
# AVAILABLE:
fitbitjson_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after midnight. By default, 660 (11:00).
fitbitparsed_mysql:
DATABASE_GROUP: MY_GROUP
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after midnight. By default, 660 (11:00).
fitbitjson_csv:
FOLDER: data/external/example_workflow
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after midnight. By default, 660 (11:00).
fitbitparsed_csv:
FOLDER: data/external/fitbit_csv
SLEEP_SUMMARY_EPISODE_DAY_ANCHOR: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
SLEEP_SUMMARY_LAST_NIGHT_END: 660 # a number ranged from 0 (midnight) to 1439 (23:59) which denotes number of minutes after midnight. By default, 660 (11:00).
# Sensors ------
@ -356,7 +356,6 @@ FITBIT_HEARTRATE_INTRADAY:
# See https://www.rapids.science/latest/features/fitbit-sleep-summary/
FITBIT_SLEEP_SUMMARY:
CONTAINER: fitbit_data.csv
SLEEP_EPISODE_TIMESTAMP: end # summary sleep episodes are considered as events based on either the start timestamp or end timestamp.
PROVIDERS:
RAPIDS:
COMPUTE: True
@ -371,31 +370,27 @@ FITBIT_SLEEP_INTRADAY:
RAPIDS:
COMPUTE: False
FEATURES:
LEVELS_AND_TYPES_COMBINING_ALL: True
LEVELS_AND_TYPES: [countepisode, sumduration, maxduration, minduration, avgduration, medianduration, stdduration]
RATIOS_TYPE: [count, duration]
RATIOS_SCOPE: [ACROSS_LEVELS, ACROSS_TYPES, WITHIN_LEVELS, WITHIN_TYPES]
ROUTINE: [starttimefirstmainsleep, endtimelastmainsleep, starttimefirstnap, endtimelastnap]
SLEEP_LEVELS:
INCLUDE_ALL_GROUPS: True
CLASSIC: [awake, restless, asleep]
STAGES: [wake, deep, light, rem]
UNIFIED: [awake, asleep]
SLEEP_TYPES: [main, nap]
INCLUDE_SLEEP_LATER_THAN: 0 # a number ranged from 0 (midnight) to 1439 (23:59)
REFERENCE_TIME: MIDNIGHT # chosen from "MIDNIGHT" and "START_OF_THE_SEGMENT"
SLEEP_TYPES: [main, nap, all]
SRC_SCRIPT: src/features/fitbit_sleep_intraday/rapids/main.py
PRICE:
COMPUTE: False
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, "stdstarttimeofepisodemain", "stdendtimeofepisodemain", "stdmidpointofepisodemain", socialjetlag, meanssdstarttimeofepisodemain, meanssdendtimeofepisodemain, meanssdmidpointofepisodemain, medianssdstarttimeofepisodemain, medianssdendtimeofepisodemain, medianssdmidpointofepisodemain]
FEATURES: [avgduration, avgratioduration, avgstarttimeofepisodemain, avgendtimeofepisodemain, avgmidpointofepisodemain, stdstarttimeofepisodemain, stdendtimeofepisodemain, stdmidpointofepisodemain, socialjetlag, rmssdmeanstarttimeofepisodemain, rmssdmeanendtimeofepisodemain, rmssdmeanmidpointofepisodemain, rmssdmedianstarttimeofepisodemain, rmssdmedianendtimeofepisodemain, rmssdmedianmidpointofepisodemain]
SLEEP_LEVELS:
INCLUDE_ALL_GROUPS: True
CLASSIC: [awake, restless, asleep]
STAGES: [wake, deep, light, rem]
UNIFIED: [awake, asleep]
DAY_TYPES: [WEEKEND, WEEK, ALL]
GROUP_EPISODES_WITHIN: # by default: today's 6pm to tomorrow's noon
START_TIME: 1080 # number of minutes after the midnight (18:00) 18*60
LENGTH: 1080 # in minutes (18 hours) 18*60
LAST_NIGHT_END: 660 # number of minutes after midnight (11:00) 11*60
SRC_SCRIPT: src/features/fitbit_sleep_intraday/price/main.py
# See https://www.rapids.science/latest/features/fitbit-steps-summary/