Add calories intraday features
parent
20910bf1dc
commit
9c56422529
|
@ -217,6 +217,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
|
for provider in config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"].keys():
|
||||||
|
if config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_raw.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_calories_intraday.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
||||||
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||||
|
|
13
config.yaml
13
config.yaml
|
@ -337,6 +337,19 @@ FITBIT_DATA_STREAMS:
|
||||||
|
|
||||||
# Sensors ------
|
# Sensors ------
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
CONTAINER: fitbit_data
|
||||||
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
EPISODE_TYPE: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
|
||||||
|
EPISODE_TIME_THRESHOLD: 5 # minutes
|
||||||
|
EPISODE_MET_THRESHOLD: 3
|
||||||
|
EPISODE_MVPA_CATEGORIES: [fairlyactive, veryactive]
|
||||||
|
EPISODE_REFERENCE_TIME: MIDNIGHT # or START_OF_THE_SEGMENT
|
||||||
|
FEATURES: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
|
||||||
|
SRC_SCRIPT: src/features/fitbit_calories_intraday/rapids/main.R
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/fitbit-data-yield/
|
# See https://www.rapids.science/latest/features/fitbit-data-yield/
|
||||||
FITBIT_DATA_YIELD:
|
FITBIT_DATA_YIELD:
|
||||||
SENSOR: FITBIT_HEARTRATE_INTRADAY
|
SENSOR: FITBIT_HEARTRATE_INTRADAY
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
# Fitbit Calories Intraday
|
||||||
|
|
||||||
|
Sensor parameters description for `[FITBIT_CALORIES_INTRADAY]`:
|
||||||
|
|
||||||
|
|Key | Description |
|
||||||
|
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|`[CONTAINER]`| Container where your calories intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
|
||||||
|
|
||||||
|
|
||||||
|
## RAPIDS provider
|
||||||
|
|
||||||
|
!!! info "Available time segments"
|
||||||
|
- Available for all time segments
|
||||||
|
|
||||||
|
!!! info "File Sequence"
|
||||||
|
```bash
|
||||||
|
- data/raw/{pid}/fitbit_calories_intraday_raw.csv
|
||||||
|
- data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv
|
||||||
|
- data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv
|
||||||
|
- data/processed/features/{pid}/fitbit_calories_intraday.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Parameters description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|
||||||
|
|
||||||
|
|Key | Description |
|
||||||
|
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
|`[COMPUTE]` | Set to `True` to extract `FITBIT_CALORIES_INTRADAY` features from the `RAPIDS` provider|
|
||||||
|
|`[FEATURES]` | Features to be computed from calories intraday data, see table below |
|
||||||
|
|`[EPISODE_TYPE]` | RAPIDS will compute features for any episodes in this list. There are seven types of episodes defined as consecutive appearances of a label. Four are based on the activity level labels provided by Fitbit: `sedentary`, `lightly active`, `fairly active`, and `very active`. One is defined by RAPIDS as moderate to vigorous physical activity `MVPA` episodes that are based on all `fairly active`, and `very active` labels. Two are defined by the user based on a threshold that divides low or high MET (metabolic equivalent) episodes. |
|
||||||
|
|`EPISODE_TIME_THRESHOLD` | Any consecutive rows of the same `[EPISODE_TYPE]` will be considered a single episode if the time difference between them is less or equal than this threshold in minutes|
|
||||||
|
|`[EPISODE_MET_THRESHOLD]` | Any 1-minute calorie data chunk with a MET value equal or higher than this threshold will be considered a high MET episode and low MET otherwise. The default value is 3|
|
||||||
|
|`[EPISODE_MVPA_CATEGORIES]` | The Fitbit level labels that are considered part of a moderate to vigorous physical activity episode. One or more of `sedentary`, `lightly active`, `fairly active`, and `very active`. The default are `fairly active` and `very active`|
|
||||||
|
|`[EPISODE_REFERENCE_TIME]` | Reference time for the start/end time features. `MIDNIGHT` sets the reference time to 00:00 of each day, `START_OF_THE_SEGMENT` sets the reference time to the start of the time segment (useful when a segment is shorter than a day or spans multiple days)|
|
||||||
|
|
||||||
|
|
||||||
|
Features description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|
||||||
|
|
||||||
|
|Feature |Units |Description|
|
||||||
|
|-------------------------- |---------- |---------------------------|
|
||||||
|
|starttimefirstepisode`EPISODE_TYPE` |minutes |Start time of the first episode of type `[EPISODE_TYPE]`
|
||||||
|
|endtimefirstepisode`EPISODE_TYPE` |minutes |End time of the first episode of type `[EPISODE_TYPE]`
|
||||||
|
|starttimelastepisode`EPISODE_TYPE` |minutes |Start time of the last episode of type `[EPISODE_TYPE]`
|
||||||
|
|endtimelastepisode`EPISODE_TYPE` |minutes |End time of the last episode of type `[EPISODE_TYPE]`
|
||||||
|
|starttimelongestepisode`EPISODE_TYPE` |minutes |Start time of the longest episode of type `[EPISODE_TYPE]`
|
||||||
|
|endtimelongestepisode`EPISODE_TYPE` |minutes |End time of the longest episode of type `[EPISODE_TYPE]`
|
||||||
|
|countepisode`EPISODE_TYPE` |episodes |The number of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|sumdurationepisode`EPISODE_TYPE` |minutes |The sum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|avgdurationepisode`EPISODE_TYPE` |minutes |The average of the duration of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|maxdurationepisode`EPISODE_TYPE` |minutes |The maximum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|mindurationepisode`EPISODE_TYPE` |minutes |The minimum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|stddurationepisode`EPISODE_TYPE` |minutes |The standard deviation of the duration of episodes of type `[EPISODE_TYPE]`
|
||||||
|
|summet`EPISODE_TYPE` |METs |The sum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|avgmet`EPISODE_TYPE` |METs |The average of all METs during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|maxmet`EPISODE_TYPE` |METs |The maximum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|minmet`EPISODE_TYPE` |METs |The minimum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|stdmet`EPISODE_TYPE` |METs |The standard deviation of all METs during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|sumcalories`EPISODE_TYPE` |calories |The sum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|avgcalories`EPISODE_TYPE` |calories |The average of all calories during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|maxcalories`EPISODE_TYPE` |calories |The maximum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|mincalories`EPISODE_TYPE` |calories |The minimum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|stdcalories`EPISODE_TYPE` |calories |The standard deviation of all calories during episodes of type `[EPISODE_TYPE]`
|
||||||
|
|
||||||
|
|
||||||
|
!!! note "Assumptions/Observations"
|
||||||
|
- These features are based on intraday calories data that is usually obtained in 1-minute chunks from Fitbit's API.
|
||||||
|
- The MET value returned by Fitbit is divided by 10
|
||||||
|
- Take into account that the [intraday data returned by Fitbit](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series) can contain time series for calories burned inclusive of BMR, tracked activity, and manually logged activities.
|
|
@ -119,6 +119,7 @@ nav:
|
||||||
- Phone WiFI Connected: features/phone-wifi-connected.md
|
- Phone WiFI Connected: features/phone-wifi-connected.md
|
||||||
- Phone WiFI Visible: features/phone-wifi-visible.md
|
- Phone WiFI Visible: features/phone-wifi-visible.md
|
||||||
- Fitbit:
|
- Fitbit:
|
||||||
|
- Fitbit Calories Intraday: features/fitbit-calories-intraday.md
|
||||||
- Fitbit Data Yield: features/fitbit-data-yield.md
|
- Fitbit Data Yield: features/fitbit-data-yield.md
|
||||||
- Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
|
- Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
|
||||||
- Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md
|
- Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md
|
||||||
|
|
|
@ -516,6 +516,32 @@ rule phone_wifi_visible_r_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
|
rule fitbit_calories_intraday_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "fitbit_calories_intraday"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule fitbit_calories_intraday_r_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "fitbit_calories_intraday"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_r_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule fitbit_data_yield_python_features:
|
rule fitbit_data_yield_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
|
||||||
|
|
|
@ -107,7 +107,6 @@ get_participant_most_common_tz <- function(tz_codes_file, participant_file){
|
||||||
return(most_common_tz)
|
return(most_common_tz)
|
||||||
}
|
}
|
||||||
|
|
||||||
# TODO include CSV timezone file in rule
|
|
||||||
multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
|
multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
|
||||||
if(nrow(sensor_data) == 0)
|
if(nrow(sensor_data) == 0)
|
||||||
return(sensor_data %>% mutate(local_timezone = NA_character_))
|
return(sensor_data %>% mutate(local_timezone = NA_character_))
|
||||||
|
|
|
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
|
||||||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: FLAG_TO_MUTATE
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||||
|
LEVEL: FLAG_TO_MUTATE
|
||||||
|
METS: FLAG_TO_MUTATE
|
||||||
|
VALUE: FLAG_TO_MUTATE
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
|
||||||
|
|
|
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
|
||||||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: FLAG_TO_MUTATE
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||||
|
LEVEL: FLAG_TO_MUTATE
|
||||||
|
METS: FLAG_TO_MUTATE
|
||||||
|
VALUE: FLAG_TO_MUTATE
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
|
||||||
|
|
|
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: FLAG_TO_MUTATE
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOCAL_DATE_TIME: local_date_time
|
||||||
|
LEVEL: level
|
||||||
|
METS: mets
|
||||||
|
VALUE: value
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||||
|
|
|
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: FLAG_TO_MUTATE
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
LOCAL_DATE_TIME: local_date_time
|
||||||
|
LEVEL: level
|
||||||
|
METS: mets
|
||||||
|
VALUE: value
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
CALORIES_INTRADAY_COLUMNS = ("device_id", "level", "mets", "value", "local_date_time", "timestamp")
|
||||||
|
|
||||||
|
def parseCaloriesData(calories_data):
|
||||||
|
if calories_data.empty:
|
||||||
|
return pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
|
||||||
|
device_id = calories_data["device_id"].iloc[0]
|
||||||
|
records_intraday = []
|
||||||
|
|
||||||
|
# Parse JSON into individual records
|
||||||
|
for record in calories_data.json_fitbit_column:
|
||||||
|
record = json.loads(record) # Parse text into JSON
|
||||||
|
if "activities-calories" in record and "activities-calories-intraday" in record:
|
||||||
|
curr_date = datetime.strptime(record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
|
||||||
|
dataset = record["activities-calories-intraday"]["dataset"]
|
||||||
|
for data in dataset:
|
||||||
|
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||||
|
d_datetime = datetime.combine(curr_date, d_time)
|
||||||
|
row_intraday = (device_id, data["level"], data["mets"], data["value"], d_datetime, 0)
|
||||||
|
records_intraday.append(row_intraday)
|
||||||
|
|
||||||
|
return pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
||||||
|
|
||||||
|
def main(json_raw, stream_parameters):
|
||||||
|
parsed_data = parseCaloriesData(json_raw)
|
||||||
|
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
|
||||||
|
parsed_data["mets"] = parsed_data["mets"] / 10
|
||||||
|
if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']):
|
||||||
|
parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
return(parsed_data)
|
|
@ -154,7 +154,7 @@ pull_phone_data <- function(){
|
||||||
infer_device_os_container <- container_functions$infer_device_os
|
infer_device_os_container <- container_functions$infer_device_os
|
||||||
pull_data_container <- container_functions$pull_data
|
pull_data_container <- container_functions$pull_data
|
||||||
|
|
||||||
for(idx in seq_along(devices)){ #TODO remove length
|
for(idx in seq_along(devices)){
|
||||||
|
|
||||||
device <- devices[idx]
|
device <- devices[idx]
|
||||||
message(paste0("\nProcessing ", sensor, " for ", device))
|
message(paste0("\nProcessing ", sensor, " for ", device))
|
||||||
|
|
|
@ -115,7 +115,7 @@ pull_wearable_data_main <- function(){
|
||||||
|
|
||||||
pull_data_container <- load_container_script(stream_container)
|
pull_data_container <- load_container_script(stream_container)
|
||||||
|
|
||||||
for(idx in seq_along(devices)){ #TODO remove length
|
for(idx in seq_along(devices)){
|
||||||
device <- devices[idx]
|
device <- devices[idx]
|
||||||
message(paste0("\nProcessing ", sensor, " for ", device))
|
message(paste0("\nProcessing ", sensor, " for ", device))
|
||||||
|
|
||||||
|
|
|
@ -181,6 +181,14 @@ FITBIT_STEPS_INTRADAY:
|
||||||
- LOCAL_DATE_TIME
|
- LOCAL_DATE_TIME
|
||||||
- STEPS
|
- STEPS
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
- TIMESTAMP
|
||||||
|
- DEVICE_ID
|
||||||
|
- LOCAL_DATE_TIME
|
||||||
|
- LEVEL
|
||||||
|
- METS
|
||||||
|
- VALUE
|
||||||
|
|
||||||
EMPATICA_ACCELEROMETER:
|
EMPATICA_ACCELEROMETER:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
library(tidyverse)
|
||||||
|
library(lubridate)
|
||||||
|
library(glue)
|
||||||
|
|
||||||
|
create_empty_dataframe <- function(episode_type){
|
||||||
|
integer_columns <- c("countepisode{episode_type}", "starttimefirstepisode{episode_type}", "endtimefirstepisode{episode_type}", "starttimelastepisode{episode_type}", "endtimelastepisode{episode_type}", "starttimelongestepisode{episode_type}", "endtimelongestepisode{episode_type}")
|
||||||
|
integer_columns <- sapply(integer_columns, function(x) glue(x), simplify = TRUE, USE.NAMES = FALSE)
|
||||||
|
double_columns <- c()
|
||||||
|
for(col in c("duration", "calories", "mets"))
|
||||||
|
for(fun in c("sum", "mean", "min","max","sd"))
|
||||||
|
double_columns <- c(double_columns, glue("{fun}{col}episode{episode_type}"))
|
||||||
|
|
||||||
|
as_tibble(c(sapply(integer_columns, function(x) integer()), sapply(double_columns, function(x) numeric())))
|
||||||
|
}
|
||||||
|
|
||||||
|
longest <- function(duration, time){
|
||||||
|
position_longest <- min(which(duration == max(duration)))
|
||||||
|
time[position_longest]
|
||||||
|
}
|
||||||
|
|
||||||
|
episode_type_features <- function(data, episode_type, episode_id_column){
|
||||||
|
if(nrow(data) == 0)
|
||||||
|
return(create_empty_dataframe(episode_type))
|
||||||
|
|
||||||
|
data %>%
|
||||||
|
group_by(across(all_of(episode_id_column))) %>%
|
||||||
|
summarise(duration = (max(timestamp) - min(timestamp)) / 60000 + 1,
|
||||||
|
mets = sum(mets),
|
||||||
|
calories = sum(value),
|
||||||
|
start_time = min(time_since_ref),
|
||||||
|
end_time = max(time_since_ref) + 1) %>%
|
||||||
|
summarise("countepisode{episode_type}" := n(),
|
||||||
|
"starttimefirstepisode{episode_type}" := first(start_time),
|
||||||
|
"endtimefirstepisode{episode_type}" := first(end_time),
|
||||||
|
"starttimelastepisode{episode_type}" := last(start_time),
|
||||||
|
"endtimelastepisode{episode_type}" := last(end_time),
|
||||||
|
"starttimelongestepisode{episode_type}" := longest(duration, start_time),
|
||||||
|
"endtimelongestepisode{episode_type}" := longest(duration, end_time),
|
||||||
|
across(duration, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
|
||||||
|
across(calories, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
|
||||||
|
across(mets, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
rapids_features <- function(sensor_data_files, time_segment, provider){
|
||||||
|
calories <- read_csv(snakemake@input[["sensor_data"]],
|
||||||
|
col_types = cols_only(level="i", mets="d", value="d", local_date_time="T",assigned_segments="c", timestamp="d"))# %>%
|
||||||
|
MET_THRESHOLD <- provider[["EPISODE_MET_THRESHOLD"]]
|
||||||
|
MVPA_LABELS <- provider[["EPISODE_MVPA_CATEGORIES"]]
|
||||||
|
FITBIT_LEVELS <- c("sedentary", "lightlyactive", "fairlyactive", "veryactive")
|
||||||
|
MVPA_LEVELS <- which(FITBIT_LEVELS %in% MVPA_LABELS) - 1
|
||||||
|
EPISODE_TIME_THRESHOLD <- provider[["EPISODE_TIME_THRESHOLD"]]
|
||||||
|
EPISODE_REFERENCE_TIME <- provider[["EPISODE_REFERENCE_TIME"]]
|
||||||
|
REQUESTED_EPISODES <- provider[["EPISODE_TYPE"]]
|
||||||
|
REQUESTED_FEATURES <- provider[["FEATURES"]]
|
||||||
|
|
||||||
|
calories <- calories %>% filter_data_by_segment(time_segment)
|
||||||
|
|
||||||
|
if(nrow(calories) == 0)
|
||||||
|
return(bind_cols(lapply(REQUESTED_EPISODES, function(episode_type) episode_type_features(calories, episode_type, ""))) %>%
|
||||||
|
add_column(local_segment = character(), .before = 1) %>%
|
||||||
|
select(starts_with(c("local_segment", REQUESTED_FEATURES))))
|
||||||
|
|
||||||
|
calories <- calories %>%
|
||||||
|
extract(timestamps_segment, regex = "(\\d*),", into = c("segment_start_ts"), remove = TRUE, convert = TRUE) %>%
|
||||||
|
arrange(timestamp) %>%
|
||||||
|
mutate(consecutive = c(0,diff(timestamp) / 60000),
|
||||||
|
level_diff = c(0, diff(level)),
|
||||||
|
mvpa_diff = c(1, diff(if_else(level %in% MVPA_LEVELS, 1, 0))),
|
||||||
|
met_diff = c(1, diff(if_else(mets >= MET_THRESHOLD, 1, 0))),
|
||||||
|
level_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | level_diff != 0),
|
||||||
|
mvpa_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | mvpa_diff != 0),
|
||||||
|
met_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | met_diff != 0),
|
||||||
|
time_since_ref = case_when(EPISODE_REFERENCE_TIME == "MIDNIGHT" ~ ((hour(local_date_time) *3600) + (minute(local_date_time) * 60) + second(local_date_time))/60,
|
||||||
|
EPISODE_REFERENCE_TIME == "START_OF_THE_SEGMENT" ~ (timestamp - segment_start_ts) / 60000)
|
||||||
|
) %>%
|
||||||
|
select(-consecutive, -level_diff, -mvpa_diff, -met_diff) %>%
|
||||||
|
group_by(local_segment) %>%
|
||||||
|
nest() %>%
|
||||||
|
mutate(sedentary = map(data, ~ episode_type_features(.x %>% filter(level == 0) , "sedentary", "level_episode_id")),
|
||||||
|
lightlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 1) , "lightlyactive", "level_episode_id")),
|
||||||
|
fairlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 2) , "fairlyactive", "level_episode_id")),
|
||||||
|
veryactive = map(data, ~ episode_type_features(.x %>% filter(level == 3) , "veryactive", "level_episode_id")),
|
||||||
|
mvpa = map(data, ~ episode_type_features(.x %>% filter(level >= 2) , "mvpa", "mvpa_episode_id")),
|
||||||
|
lowmet = map(data, ~ episode_type_features(.x %>% filter(mets < MET_THRESHOLD) , "lowmet", "met_episode_id")),
|
||||||
|
highmet = map(data, ~ episode_type_features(.x %>% filter(mets >= MET_THRESHOLD) , "highmet", "met_episode_id"))
|
||||||
|
) %>%
|
||||||
|
ungroup() %>%
|
||||||
|
select(all_of(c("local_segment", REQUESTED_EPISODES))) %>%
|
||||||
|
unnest(everything(), keep_empty=TRUE) %>%
|
||||||
|
select(starts_with(c("local_segment", REQUESTED_FEATURES)))
|
||||||
|
}
|
|
@ -11,6 +11,10 @@ filter_data_by_segment <- function(data, time_segment){
|
||||||
mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>%
|
mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>%
|
||||||
extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>%
|
extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>%
|
||||||
select(-assigned_segments)
|
select(-assigned_segments)
|
||||||
|
|
||||||
|
# chunk episodes
|
||||||
|
if (nrow(data) > 0 && all(c("start_timestamp","end_timestamp") %in% colnames(data)) )
|
||||||
|
data <- chunk_episodes(data)
|
||||||
return(data)
|
return(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1125,6 +1125,47 @@ properties:
|
||||||
INCLUDE_ZERO_STEP_ROWS:
|
INCLUDE_ZERO_STEP_ROWS:
|
||||||
type: boolean
|
type: boolean
|
||||||
|
|
||||||
|
|
||||||
|
FITBIT_CALORIES_INTRADAY:
|
||||||
|
type: object
|
||||||
|
required: [CONTAINER, PROVIDERS]
|
||||||
|
properties:
|
||||||
|
CONTAINER:
|
||||||
|
type: string
|
||||||
|
PROVIDERS:
|
||||||
|
type: ["null", object]
|
||||||
|
properties:
|
||||||
|
RAPIDS:
|
||||||
|
allOf:
|
||||||
|
- $ref: "#/definitions/PROVIDER"
|
||||||
|
- properties:
|
||||||
|
FEATURES:
|
||||||
|
uniqueItems: True
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
enum: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
|
||||||
|
EPISODE_TYPE:
|
||||||
|
uniqueItems: True
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
enum: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
|
||||||
|
EPISODE_TIME_THRESHOLD:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
EPISODE_MET_THRESHOLD:
|
||||||
|
type: integer
|
||||||
|
minimum: 1
|
||||||
|
EPISODE_MVPA_CATEGORIES:
|
||||||
|
uniqueItems: True
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
enum: [sedentary, lightlyactive, fairlyactive, veryactive]
|
||||||
|
EPISODE_REFERENCE_TIME:
|
||||||
|
type: string
|
||||||
|
enum: [MIDNIGHT, START_OF_THE_SEGMENT]
|
||||||
|
additionalProperties:
|
||||||
|
$ref: "#/definitions/PROVIDER"
|
||||||
|
|
||||||
HISTOGRAM_PHONE_DATA_YIELD:
|
HISTOGRAM_PHONE_DATA_YIELD:
|
||||||
type: object
|
type: object
|
||||||
required: [PLOT]
|
required: [PLOT]
|
||||||
|
|
Loading…
Reference in New Issue