Add calories intraday features

2021-04-16 18:02:43 -04:00 · 2021-04-16 18:02:43 -04:00 · 9c56422529
parent 20910bf1dc
commit 9c56422529
17 changed files with 351 additions and 3 deletions
--- a/9
+++ b/9
@ -217,6 +217,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
        files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
        files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
 for provider in config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"].keys():
    if config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
        files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_raw.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
        files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_calories_intraday.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
        files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
 for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
    if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
        files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
--- a/config.yaml
+++ b/config.yaml
@ -337,6 +337,19 @@ FITBIT_DATA_STREAMS:
 # Sensors ------
 FITBIT_CALORIES_INTRADAY:
  CONTAINER: fitbit_data
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
      EPISODE_TYPE: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
      EPISODE_TIME_THRESHOLD: 5 # minutes
      EPISODE_MET_THRESHOLD: 3
      EPISODE_MVPA_CATEGORIES: [fairlyactive, veryactive]
      EPISODE_REFERENCE_TIME: MIDNIGHT # or START_OF_THE_SEGMENT
      FEATURES: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
      SRC_SCRIPT: src/features/fitbit_calories_intraday/rapids/main.R
 # See https://www.rapids.science/latest/features/fitbit-data-yield/
 FITBIT_DATA_YIELD:
  SENSOR: FITBIT_HEARTRATE_INTRADAY
--- a/docs/features/fitbit-calories-intraday.md
+++ b/docs/features/fitbit-calories-intraday.md
@ -0,0 +1,68 @@
 # Fitbit Calories Intraday
 Sensor parameters description for `[FITBIT_CALORIES_INTRADAY]`:
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
 |`[CONTAINER]`| Container where your calories intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 ## RAPIDS provider
 !!! info "Available time segments"
    - Available for all time segments
 !!! info "File Sequence"
    ```bash
    - data/raw/{pid}/fitbit_calories_intraday_raw.csv
    - data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv
    - data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv
    - data/processed/features/{pid}/fitbit_calories_intraday.csv
    ```
 Parameters description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
 |`[COMPUTE]`  | Set to `True` to extract `FITBIT_CALORIES_INTRADAY` features from the `RAPIDS` provider|
 |`[FEATURES]` |         Features to be computed from calories intraday data, see table below          |
 |`[EPISODE_TYPE]` |    RAPIDS will compute features for any episodes in this list. There are seven types of episodes defined as consecutive appearances of a label. Four are based on the activity level labels provided by Fitbit: `sedentary`, `lightly active`, `fairly active`, and `very active`. One is defined by RAPIDS as moderate to vigorous physical activity `MVPA` episodes that are based on all `fairly active`, and `very active`  labels. Two are defined by the user based on a threshold that divides low or high MET (metabolic equivalent) episodes.        |
 |`EPISODE_TIME_THRESHOLD` | Any consecutive rows of the same `[EPISODE_TYPE]` will be considered a single episode if the time difference between them is less or equal than this threshold in minutes|
 |`[EPISODE_MET_THRESHOLD]` |    Any 1-minute calorie data chunk with a MET value equal or higher than this threshold will be considered a high MET episode and low MET otherwise.  The default value is 3|
 |`[EPISODE_MVPA_CATEGORIES]` |    The Fitbit level labels that are considered part of a moderate to vigorous physical activity episode. One or more of `sedentary`, `lightly active`, `fairly active`, and `very active`. The default are `fairly active` and `very active`|
 |`[EPISODE_REFERENCE_TIME]` |   Reference time for the start/end time features. `MIDNIGHT` sets the reference time to 00:00 of each day, `START_OF_THE_SEGMENT` sets the reference time to the start of the time segment (useful when a segment is shorter than a day or spans multiple days)|
 Features description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
 |Feature&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;                    |Units      |Description|
 |-------------------------- |---------- |---------------------------|
 |starttimefirstepisode`EPISODE_TYPE`               |minutes     |Start time of the first episode of type `[EPISODE_TYPE]`
 |endtimefirstepisode`EPISODE_TYPE`               |minutes     |End time of the first episode of type `[EPISODE_TYPE]`
 |starttimelastepisode`EPISODE_TYPE`               |minutes     |Start time of the last episode of type `[EPISODE_TYPE]`
 |endtimelastepisode`EPISODE_TYPE`               |minutes     |End time of the last episode of type `[EPISODE_TYPE]`
 |starttimelongestepisode`EPISODE_TYPE`               |minutes     |Start time of the longest episode of type `[EPISODE_TYPE]`
 |endtimelongestepisode`EPISODE_TYPE`               |minutes     |End time of the longest episode of type `[EPISODE_TYPE]`
 |countepisode`EPISODE_TYPE`               |episodes     |The number of episodes of type `[EPISODE_TYPE]`
 |sumdurationepisode`EPISODE_TYPE`               |minutes     |The sum of the duration of episodes of type `[EPISODE_TYPE]`
 |avgdurationepisode`EPISODE_TYPE`               |minutes     |The average of the duration of episodes of type `[EPISODE_TYPE]`
 |maxdurationepisode`EPISODE_TYPE`               |minutes     |The maximum of the duration of episodes of type `[EPISODE_TYPE]`
 |mindurationepisode`EPISODE_TYPE`               |minutes     |The minimum of the duration of episodes of type `[EPISODE_TYPE]`
 |stddurationepisode`EPISODE_TYPE`               |minutes     |The standard deviation of the duration of episodes of type `[EPISODE_TYPE]`
 |summet`EPISODE_TYPE`               |METs     |The sum of all METs during episodes of type `[EPISODE_TYPE]`
 |avgmet`EPISODE_TYPE`               |METs     |The average of all METs during episodes of type `[EPISODE_TYPE]`
 |maxmet`EPISODE_TYPE`               |METs     |The maximum of all METs during episodes of type `[EPISODE_TYPE]`
 |minmet`EPISODE_TYPE`               |METs     |The minimum of all METs during episodes of type `[EPISODE_TYPE]`
 |stdmet`EPISODE_TYPE`               |METs     |The standard deviation of all METs during episodes of type `[EPISODE_TYPE]`
 |sumcalories`EPISODE_TYPE`               |calories     |The sum of all calories during episodes of type `[EPISODE_TYPE]`
 |avgcalories`EPISODE_TYPE`               |calories     |The average of all calories during episodes of type `[EPISODE_TYPE]`
 |maxcalories`EPISODE_TYPE`               |calories     |The maximum of all calories during episodes of type `[EPISODE_TYPE]`
 |mincalories`EPISODE_TYPE`               |calories     |The minimum of all calories during episodes of type `[EPISODE_TYPE]`
 |stdcalories`EPISODE_TYPE`               |calories     |The standard deviation of all calories during episodes of type `[EPISODE_TYPE]`
 !!! note "Assumptions/Observations"
    - These features are based on intraday calories data that is usually obtained in 1-minute chunks from Fitbit's API.
    - The MET value returned by Fitbit is divided by 10
    - Take into account that the [intraday data returned by Fitbit](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series) can contain time series for calories burned inclusive of BMR, tracked activity, and manually logged activities.
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -119,6 +119,7 @@ nav:
        - Phone WiFI Connected: features/phone-wifi-connected.md
        - Phone WiFI Visible: features/phone-wifi-visible.md
      - Fitbit:
        - Fitbit Calories Intraday: features/fitbit-calories-intraday.md
        - Fitbit Data Yield: features/fitbit-data-yield.md
        - Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
        - Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md
--- a/rules/features.smk
+++ b/rules/features.smk
@ -516,6 +516,32 @@ rule phone_wifi_visible_r_features:
    script:
        "../src/features/entry.R"
 rule fitbit_calories_intraday_python_features:
    input:
        sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
        time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
    params:
        provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
        sensor_key = "fitbit_calories_intraday"
    output:
        "data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule fitbit_calories_intraday_r_features:
    input:
        sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
        time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
    params:
        provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
        sensor_key = "fitbit_calories_intraday"
    output:
        "data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule fitbit_data_yield_python_features:
    input:
        sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
--- a/src/data/datetime/assign_to_multiple_timezones.R
+++ b/src/data/datetime/assign_to_multiple_timezones.R
@ -107,7 +107,6 @@ get_participant_most_common_tz <- function(tz_codes_file, participant_file){
  return(most_common_tz)
 }
 # TODO include CSV timezone file in rule
 multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
  if(nrow(sensor_data) == 0)
    return(sensor_data %>% mutate(local_timezone = NA_character_))
--- a/src/data/streams/fitbitjson_csv/format.yaml
+++ b/src/data/streams/fitbitjson_csv/format.yaml
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
 FITBIT_CALORIES_INTRADAY:
  RAPIDS_COLUMN_MAPPINGS:
    TIMESTAMP: FLAG_TO_MUTATE
    DEVICE_ID: device_id
    LOCAL_DATE_TIME: FLAG_TO_MUTATE
    LEVEL: FLAG_TO_MUTATE
    METS: FLAG_TO_MUTATE
    VALUE: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
--- a/src/data/streams/fitbitjson_mysql/format.yaml
+++ b/src/data/streams/fitbitjson_mysql/format.yaml
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
 FITBIT_CALORIES_INTRADAY:
  RAPIDS_COLUMN_MAPPINGS:
    TIMESTAMP: FLAG_TO_MUTATE
    DEVICE_ID: device_id
    LOCAL_DATE_TIME: FLAG_TO_MUTATE
    LEVEL: FLAG_TO_MUTATE
    METS: FLAG_TO_MUTATE
    VALUE: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
--- a/src/data/streams/fitbitparsed_csv/format.yaml
+++ b/src/data/streams/fitbitparsed_csv/format.yaml
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
    COLUMN_MAPPINGS:
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/add_zero_timestamp.py
 FITBIT_CALORIES_INTRADAY:
  RAPIDS_COLUMN_MAPPINGS:
    TIMESTAMP: FLAG_TO_MUTATE
    DEVICE_ID: device_id
    LOCAL_DATE_TIME: local_date_time
    LEVEL: level
    METS: mets
    VALUE: value
  MUTATION:
    COLUMN_MAPPINGS:
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/add_zero_timestamp.py
--- a/src/data/streams/fitbitparsed_mysql/format.yaml
+++ b/src/data/streams/fitbitparsed_mysql/format.yaml
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
    COLUMN_MAPPINGS:
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/add_zero_timestamp.py
 FITBIT_CALORIES_INTRADAY:
  RAPIDS_COLUMN_MAPPINGS:
    TIMESTAMP: FLAG_TO_MUTATE
    DEVICE_ID: device_id
    LOCAL_DATE_TIME: local_date_time
    LEVEL: level
    METS: mets
    VALUE: value
  MUTATION:
    COLUMN_MAPPINGS:
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/add_zero_timestamp.py
--- a/src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
+++ b/src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
@ -0,0 +1,33 @@
 import json
 import pandas as pd
 from datetime import datetime
 CALORIES_INTRADAY_COLUMNS = ("device_id", "level", "mets", "value", "local_date_time", "timestamp")
 def parseCaloriesData(calories_data):
    if calories_data.empty:
        return pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
    device_id = calories_data["device_id"].iloc[0]
    records_intraday = []
    # Parse JSON into individual records
    for record in calories_data.json_fitbit_column:
        record = json.loads(record)  # Parse text into JSON
        if "activities-calories" in record and "activities-calories-intraday" in record:
            curr_date = datetime.strptime(record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
            dataset = record["activities-calories-intraday"]["dataset"]
            for data in dataset:
                d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
                d_datetime = datetime.combine(curr_date, d_time)
                row_intraday = (device_id, data["level"], data["mets"], data["value"], d_datetime, 0)
                records_intraday.append(row_intraday)
    return pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
 def main(json_raw, stream_parameters):
    parsed_data = parseCaloriesData(json_raw)
    parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
    parsed_data["mets"] = parsed_data["mets"] / 10
    if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']):
        parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
    return(parsed_data)
--- a/src/data/streams/pull_phone_data.R
+++ b/src/data/streams/pull_phone_data.R
@ -154,7 +154,7 @@ pull_phone_data <- function(){
  infer_device_os_container <- container_functions$infer_device_os
  pull_data_container <- container_functions$pull_data
-  for(idx in seq_along(devices)){ #TODO remove length
+  for(idx in seq_along(devices)){ 
    device <- devices[idx]
    message(paste0("\nProcessing ", sensor, " for ", device))
--- a/src/data/streams/pull_wearable_data.R
+++ b/src/data/streams/pull_wearable_data.R
@ -115,7 +115,7 @@ pull_wearable_data_main <- function(){
  pull_data_container <- load_container_script(stream_container)
-  for(idx in seq_along(devices)){ #TODO remove length    
+  for(idx in seq_along(devices)){ 
    device <- devices[idx]
    message(paste0("\nProcessing ", sensor, " for ", device))
--- a/src/data/streams/rapids_columns.yaml
+++ b/src/data/streams/rapids_columns.yaml
@ -181,6 +181,14 @@ FITBIT_STEPS_INTRADAY:
  - LOCAL_DATE_TIME
  - STEPS
 FITBIT_CALORIES_INTRADAY:
  - TIMESTAMP
  - DEVICE_ID
  - LOCAL_DATE_TIME
  - LEVEL
  - METS
  - VALUE
 EMPATICA_ACCELEROMETER:
  - TIMESTAMP
  - DEVICE_ID
--- a/src/features/fitbit_calories_intraday/rapids/main.R
+++ b/src/features/fitbit_calories_intraday/rapids/main.R
@ -0,0 +1,92 @@
 source("renv/activate.R")
 library(tidyverse)
 library(lubridate)
 library(glue)
 create_empty_dataframe <- function(episode_type){
  integer_columns <- c("countepisode{episode_type}", "starttimefirstepisode{episode_type}", "endtimefirstepisode{episode_type}", "starttimelastepisode{episode_type}", "endtimelastepisode{episode_type}", "starttimelongestepisode{episode_type}", "endtimelongestepisode{episode_type}")
  integer_columns <- sapply(integer_columns, function(x) glue(x), simplify = TRUE, USE.NAMES = FALSE)
  double_columns <- c()
  for(col in c("duration", "calories", "mets"))
    for(fun in c("sum", "mean", "min","max","sd"))
      double_columns <- c(double_columns, glue("{fun}{col}episode{episode_type}"))
  as_tibble(c(sapply(integer_columns, function(x) integer()), sapply(double_columns, function(x) numeric())))
 }
 longest <- function(duration, time){
  position_longest <- min(which(duration == max(duration)))
  time[position_longest]
 }
 episode_type_features <- function(data, episode_type, episode_id_column){
  if(nrow(data) == 0)
    return(create_empty_dataframe(episode_type))
  data %>%
    group_by(across(all_of(episode_id_column))) %>% 
    summarise(duration = (max(timestamp) - min(timestamp)) / 60000 + 1,
              mets = sum(mets),
              calories = sum(value),
              start_time = min(time_since_ref),
              end_time = max(time_since_ref) + 1) %>% 
    summarise("countepisode{episode_type}" := n(), 
              "starttimefirstepisode{episode_type}" := first(start_time),
              "endtimefirstepisode{episode_type}" := first(end_time),
              "starttimelastepisode{episode_type}" := last(start_time),
              "endtimelastepisode{episode_type}" := last(end_time),
              "starttimelongestepisode{episode_type}" := longest(duration, start_time),
              "endtimelongestepisode{episode_type}" := longest(duration, end_time),
              across(duration, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
              across(calories, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
              across(mets, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"))
 }
 rapids_features <- function(sensor_data_files, time_segment, provider){
    calories <- read_csv(snakemake@input[["sensor_data"]], 
                          col_types = cols_only(level="i", mets="d", value="d", local_date_time="T",assigned_segments="c", timestamp="d"))# %>%
    MET_THRESHOLD <- provider[["EPISODE_MET_THRESHOLD"]]
    MVPA_LABELS <- provider[["EPISODE_MVPA_CATEGORIES"]]
    FITBIT_LEVELS <- c("sedentary", "lightlyactive", "fairlyactive", "veryactive")
    MVPA_LEVELS <- which(FITBIT_LEVELS %in% MVPA_LABELS) - 1
    EPISODE_TIME_THRESHOLD <- provider[["EPISODE_TIME_THRESHOLD"]]
    EPISODE_REFERENCE_TIME <- provider[["EPISODE_REFERENCE_TIME"]]
    REQUESTED_EPISODES <-  provider[["EPISODE_TYPE"]]
    REQUESTED_FEATURES <- provider[["FEATURES"]]
    calories <- calories %>% filter_data_by_segment(time_segment) 
    if(nrow(calories) == 0)
      return(bind_cols(lapply(REQUESTED_EPISODES, function(episode_type) episode_type_features(calories, episode_type, ""))) %>% 
              add_column(local_segment = character(), .before = 1) %>%
              select(starts_with(c("local_segment", REQUESTED_FEATURES))))
    calories <- calories %>% 
      extract(timestamps_segment, regex = "(\\d*),", into = c("segment_start_ts"), remove = TRUE, convert = TRUE) %>%
      arrange(timestamp) %>% 
      mutate(consecutive = c(0,diff(timestamp) / 60000),
            level_diff = c(0, diff(level)),
            mvpa_diff = c(1, diff(if_else(level %in% MVPA_LEVELS, 1, 0))),
            met_diff = c(1, diff(if_else(mets >= MET_THRESHOLD, 1, 0))),
            level_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | level_diff != 0),
            mvpa_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | mvpa_diff != 0),
            met_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | met_diff != 0),
            time_since_ref = case_when(EPISODE_REFERENCE_TIME == "MIDNIGHT" ~ ((hour(local_date_time) *3600) + (minute(local_date_time) * 60) + second(local_date_time))/60,
                                        EPISODE_REFERENCE_TIME == "START_OF_THE_SEGMENT" ~ (timestamp - segment_start_ts) / 60000)
            ) %>% 
      select(-consecutive, -level_diff, -mvpa_diff, -met_diff) %>% 
      group_by(local_segment) %>%
      nest() %>%
      mutate(sedentary = map(data, ~ episode_type_features(.x %>% filter(level == 0) , "sedentary", "level_episode_id")),
            lightlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 1) , "lightlyactive", "level_episode_id")),
            fairlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 2) , "fairlyactive", "level_episode_id")),
            veryactive = map(data, ~ episode_type_features(.x %>% filter(level == 3) , "veryactive", "level_episode_id")),
            mvpa = map(data, ~ episode_type_features(.x %>% filter(level >= 2) , "mvpa", "mvpa_episode_id")),
            lowmet = map(data, ~ episode_type_features(.x %>% filter(mets < MET_THRESHOLD) , "lowmet", "met_episode_id")),
            highmet = map(data, ~ episode_type_features(.x %>% filter(mets >= MET_THRESHOLD) , "highmet", "met_episode_id"))
            ) %>% 
      ungroup() %>% 
      select(all_of(c("local_segment", REQUESTED_EPISODES))) %>% 
      unnest(everything(), keep_empty=TRUE) %>% 
      select(starts_with(c("local_segment", REQUESTED_FEATURES)))
 }
--- a/src/features/utils/utils.R
+++ b/src/features/utils/utils.R
@ -11,6 +11,10 @@ filter_data_by_segment <- function(data, time_segment){
    mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>% 
    extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>% 
    select(-assigned_segments)
  # chunk episodes
  if (nrow(data) > 0 && all(c("start_timestamp","end_timestamp") %in% colnames(data)) )
      data <- chunk_episodes(data)
  return(data)
 }
--- a/tools/config.schema.yaml
+++ b/tools/config.schema.yaml
@ -1125,6 +1125,47 @@ properties:
                  INCLUDE_ZERO_STEP_ROWS:
                    type: boolean    
  FITBIT_CALORIES_INTRADAY:
    type: object
    required: [CONTAINER, PROVIDERS]
    properties:
      CONTAINER:
        type: string
      PROVIDERS:
        type: ["null", object]
        properties:
          RAPIDS:
            allOf:
              - $ref: "#/definitions/PROVIDER"
              - properties:
                  FEATURES:
                    uniqueItems: True
                    items:
                      type: string
                      enum: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
                  EPISODE_TYPE:
                    uniqueItems: True
                    items:
                      type: string
                      enum: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
                  EPISODE_TIME_THRESHOLD: 
                    type: integer
                    minimum: 1
                  EPISODE_MET_THRESHOLD: 
                    type: integer
                    minimum: 1
                  EPISODE_MVPA_CATEGORIES: 
                    uniqueItems: True
                    items:
                      type: string
                      enum: [sedentary, lightlyactive, fairlyactive, veryactive]
                  EPISODE_REFERENCE_TIME:
                    type: string
                    enum: [MIDNIGHT, START_OF_THE_SEGMENT]
        additionalProperties: 
          $ref: "#/definitions/PROVIDER"
  HISTOGRAM_PHONE_DATA_YIELD:
    type: object
    required: [PLOT]