Add calories intraday features
parent
20910bf1dc
commit
9c56422529
|
@ -217,6 +217,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_calories_intraday.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||
|
|
13
config.yaml
13
config.yaml
|
@ -337,6 +337,19 @@ FITBIT_DATA_STREAMS:
|
|||
|
||||
# Sensors ------
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
CONTAINER: fitbit_data
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
EPISODE_TYPE: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
|
||||
EPISODE_TIME_THRESHOLD: 5 # minutes
|
||||
EPISODE_MET_THRESHOLD: 3
|
||||
EPISODE_MVPA_CATEGORIES: [fairlyactive, veryactive]
|
||||
EPISODE_REFERENCE_TIME: MIDNIGHT # or START_OF_THE_SEGMENT
|
||||
FEATURES: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
|
||||
SRC_SCRIPT: src/features/fitbit_calories_intraday/rapids/main.R
|
||||
|
||||
# See https://www.rapids.science/latest/features/fitbit-data-yield/
|
||||
FITBIT_DATA_YIELD:
|
||||
SENSOR: FITBIT_HEARTRATE_INTRADAY
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# Fitbit Calories Intraday
|
||||
|
||||
Sensor parameters description for `[FITBIT_CALORIES_INTRADAY]`:
|
||||
|
||||
|Key | Description |
|
||||
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||
|`[CONTAINER]`| Container where your calories intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
|
||||
|
||||
|
||||
## RAPIDS provider
|
||||
|
||||
!!! info "Available time segments"
|
||||
- Available for all time segments
|
||||
|
||||
!!! info "File Sequence"
|
||||
```bash
|
||||
- data/raw/{pid}/fitbit_calories_intraday_raw.csv
|
||||
- data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv
|
||||
- data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv
|
||||
- data/processed/features/{pid}/fitbit_calories_intraday.csv
|
||||
```
|
||||
|
||||
|
||||
Parameters description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|
||||
|
||||
|Key | Description |
|
||||
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|
||||
|`[COMPUTE]` | Set to `True` to extract `FITBIT_CALORIES_INTRADAY` features from the `RAPIDS` provider|
|
||||
|`[FEATURES]` | Features to be computed from calories intraday data, see table below |
|
||||
|`[EPISODE_TYPE]` | RAPIDS will compute features for any episodes in this list. There are seven types of episodes defined as consecutive appearances of a label. Four are based on the activity level labels provided by Fitbit: `sedentary`, `lightly active`, `fairly active`, and `very active`. One is defined by RAPIDS as moderate to vigorous physical activity `MVPA` episodes that are based on all `fairly active`, and `very active` labels. Two are defined by the user based on a threshold that divides low or high MET (metabolic equivalent) episodes. |
|
||||
|`EPISODE_TIME_THRESHOLD` | Any consecutive rows of the same `[EPISODE_TYPE]` will be considered a single episode if the time difference between them is less or equal than this threshold in minutes|
|
||||
|`[EPISODE_MET_THRESHOLD]` | Any 1-minute calorie data chunk with a MET value equal or higher than this threshold will be considered a high MET episode and low MET otherwise. The default value is 3|
|
||||
|`[EPISODE_MVPA_CATEGORIES]` | The Fitbit level labels that are considered part of a moderate to vigorous physical activity episode. One or more of `sedentary`, `lightly active`, `fairly active`, and `very active`. The default are `fairly active` and `very active`|
|
||||
|`[EPISODE_REFERENCE_TIME]` | Reference time for the start/end time features. `MIDNIGHT` sets the reference time to 00:00 of each day, `START_OF_THE_SEGMENT` sets the reference time to the start of the time segment (useful when a segment is shorter than a day or spans multiple days)|
|
||||
|
||||
|
||||
Features description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|
||||
|
||||
|Feature |Units |Description|
|
||||
|-------------------------- |---------- |---------------------------|
|
||||
|starttimefirstepisode`EPISODE_TYPE` |minutes |Start time of the first episode of type `[EPISODE_TYPE]`
|
||||
|endtimefirstepisode`EPISODE_TYPE` |minutes |End time of the first episode of type `[EPISODE_TYPE]`
|
||||
|starttimelastepisode`EPISODE_TYPE` |minutes |Start time of the last episode of type `[EPISODE_TYPE]`
|
||||
|endtimelastepisode`EPISODE_TYPE` |minutes |End time of the last episode of type `[EPISODE_TYPE]`
|
||||
|starttimelongestepisode`EPISODE_TYPE` |minutes |Start time of the longest episode of type `[EPISODE_TYPE]`
|
||||
|endtimelongestepisode`EPISODE_TYPE` |minutes |End time of the longest episode of type `[EPISODE_TYPE]`
|
||||
|countepisode`EPISODE_TYPE` |episodes |The number of episodes of type `[EPISODE_TYPE]`
|
||||
|sumdurationepisode`EPISODE_TYPE` |minutes |The sum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||
|avgdurationepisode`EPISODE_TYPE` |minutes |The average of the duration of episodes of type `[EPISODE_TYPE]`
|
||||
|maxdurationepisode`EPISODE_TYPE` |minutes |The maximum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||
|mindurationepisode`EPISODE_TYPE` |minutes |The minimum of the duration of episodes of type `[EPISODE_TYPE]`
|
||||
|stddurationepisode`EPISODE_TYPE` |minutes |The standard deviation of the duration of episodes of type `[EPISODE_TYPE]`
|
||||
|summet`EPISODE_TYPE` |METs |The sum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||
|avgmet`EPISODE_TYPE` |METs |The average of all METs during episodes of type `[EPISODE_TYPE]`
|
||||
|maxmet`EPISODE_TYPE` |METs |The maximum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||
|minmet`EPISODE_TYPE` |METs |The minimum of all METs during episodes of type `[EPISODE_TYPE]`
|
||||
|stdmet`EPISODE_TYPE` |METs |The standard deviation of all METs during episodes of type `[EPISODE_TYPE]`
|
||||
|sumcalories`EPISODE_TYPE` |calories |The sum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||
|avgcalories`EPISODE_TYPE` |calories |The average of all calories during episodes of type `[EPISODE_TYPE]`
|
||||
|maxcalories`EPISODE_TYPE` |calories |The maximum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||
|mincalories`EPISODE_TYPE` |calories |The minimum of all calories during episodes of type `[EPISODE_TYPE]`
|
||||
|stdcalories`EPISODE_TYPE` |calories |The standard deviation of all calories during episodes of type `[EPISODE_TYPE]`
|
||||
|
||||
|
||||
!!! note "Assumptions/Observations"
|
||||
- These features are based on intraday calories data that is usually obtained in 1-minute chunks from Fitbit's API.
|
||||
- The MET value returned by Fitbit is divided by 10
|
||||
- Take into account that the [intraday data returned by Fitbit](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series) can contain time series for calories burned inclusive of BMR, tracked activity, and manually logged activities.
|
|
@ -119,6 +119,7 @@ nav:
|
|||
- Phone WiFI Connected: features/phone-wifi-connected.md
|
||||
- Phone WiFI Visible: features/phone-wifi-visible.md
|
||||
- Fitbit:
|
||||
- Fitbit Calories Intraday: features/fitbit-calories-intraday.md
|
||||
- Fitbit Data Yield: features/fitbit-data-yield.md
|
||||
- Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
|
||||
- Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md
|
||||
|
|
|
@ -516,6 +516,32 @@ rule phone_wifi_visible_r_features:
|
|||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule fitbit_calories_intraday_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_calories_intraday"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule fitbit_calories_intraday_r_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
|
||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "fitbit_calories_intraday"
|
||||
output:
|
||||
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule fitbit_data_yield_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
|
||||
|
|
|
@ -107,7 +107,6 @@ get_participant_most_common_tz <- function(tz_codes_file, participant_file){
|
|||
return(most_common_tz)
|
||||
}
|
||||
|
||||
# TODO include CSV timezone file in rule
|
||||
multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
|
||||
if(nrow(sensor_data) == 0)
|
||||
return(sensor_data %>% mutate(local_timezone = NA_character_))
|
||||
|
|
|
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
|
|||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
TIMESTAMP: FLAG_TO_MUTATE
|
||||
DEVICE_ID: device_id
|
||||
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||
LEVEL: FLAG_TO_MUTATE
|
||||
METS: FLAG_TO_MUTATE
|
||||
VALUE: FLAG_TO_MUTATE
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
|
||||
|
|
|
@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
|
|||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
TIMESTAMP: FLAG_TO_MUTATE
|
||||
DEVICE_ID: device_id
|
||||
LOCAL_DATE_TIME: FLAG_TO_MUTATE
|
||||
LEVEL: FLAG_TO_MUTATE
|
||||
METS: FLAG_TO_MUTATE
|
||||
VALUE: FLAG_TO_MUTATE
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py
|
||||
|
|
|
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
|
|||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
TIMESTAMP: FLAG_TO_MUTATE
|
||||
DEVICE_ID: device_id
|
||||
LOCAL_DATE_TIME: local_date_time
|
||||
LEVEL: level
|
||||
METS: mets
|
||||
VALUE: value
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||
|
|
|
@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
|
|||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
RAPIDS_COLUMN_MAPPINGS:
|
||||
TIMESTAMP: FLAG_TO_MUTATE
|
||||
DEVICE_ID: device_id
|
||||
LOCAL_DATE_TIME: local_date_time
|
||||
LEVEL: level
|
||||
METS: mets
|
||||
VALUE: value
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
- src/data/streams/mutations/fitbit/add_zero_timestamp.py
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
import json
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
CALORIES_INTRADAY_COLUMNS = ("device_id", "level", "mets", "value", "local_date_time", "timestamp")
|
||||
|
||||
def parseCaloriesData(calories_data):
|
||||
if calories_data.empty:
|
||||
return pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
|
||||
device_id = calories_data["device_id"].iloc[0]
|
||||
records_intraday = []
|
||||
|
||||
# Parse JSON into individual records
|
||||
for record in calories_data.json_fitbit_column:
|
||||
record = json.loads(record) # Parse text into JSON
|
||||
if "activities-calories" in record and "activities-calories-intraday" in record:
|
||||
curr_date = datetime.strptime(record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
|
||||
dataset = record["activities-calories-intraday"]["dataset"]
|
||||
for data in dataset:
|
||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
row_intraday = (device_id, data["level"], data["mets"], data["value"], d_datetime, 0)
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
||||
|
||||
def main(json_raw, stream_parameters):
|
||||
parsed_data = parseCaloriesData(json_raw)
|
||||
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
|
||||
parsed_data["mets"] = parsed_data["mets"] / 10
|
||||
if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']):
|
||||
parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
return(parsed_data)
|
|
@ -154,7 +154,7 @@ pull_phone_data <- function(){
|
|||
infer_device_os_container <- container_functions$infer_device_os
|
||||
pull_data_container <- container_functions$pull_data
|
||||
|
||||
for(idx in seq_along(devices)){ #TODO remove length
|
||||
for(idx in seq_along(devices)){
|
||||
|
||||
device <- devices[idx]
|
||||
message(paste0("\nProcessing ", sensor, " for ", device))
|
||||
|
|
|
@ -115,7 +115,7 @@ pull_wearable_data_main <- function(){
|
|||
|
||||
pull_data_container <- load_container_script(stream_container)
|
||||
|
||||
for(idx in seq_along(devices)){ #TODO remove length
|
||||
for(idx in seq_along(devices)){
|
||||
device <- devices[idx]
|
||||
message(paste0("\nProcessing ", sensor, " for ", device))
|
||||
|
||||
|
|
|
@ -181,6 +181,14 @@ FITBIT_STEPS_INTRADAY:
|
|||
- LOCAL_DATE_TIME
|
||||
- STEPS
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- LOCAL_DATE_TIME
|
||||
- LEVEL
|
||||
- METS
|
||||
- VALUE
|
||||
|
||||
EMPATICA_ACCELEROMETER:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
source("renv/activate.R")
|
||||
library(tidyverse)
|
||||
library(lubridate)
|
||||
library(glue)
|
||||
|
||||
create_empty_dataframe <- function(episode_type){
|
||||
integer_columns <- c("countepisode{episode_type}", "starttimefirstepisode{episode_type}", "endtimefirstepisode{episode_type}", "starttimelastepisode{episode_type}", "endtimelastepisode{episode_type}", "starttimelongestepisode{episode_type}", "endtimelongestepisode{episode_type}")
|
||||
integer_columns <- sapply(integer_columns, function(x) glue(x), simplify = TRUE, USE.NAMES = FALSE)
|
||||
double_columns <- c()
|
||||
for(col in c("duration", "calories", "mets"))
|
||||
for(fun in c("sum", "mean", "min","max","sd"))
|
||||
double_columns <- c(double_columns, glue("{fun}{col}episode{episode_type}"))
|
||||
|
||||
as_tibble(c(sapply(integer_columns, function(x) integer()), sapply(double_columns, function(x) numeric())))
|
||||
}
|
||||
|
||||
longest <- function(duration, time){
|
||||
position_longest <- min(which(duration == max(duration)))
|
||||
time[position_longest]
|
||||
}
|
||||
|
||||
episode_type_features <- function(data, episode_type, episode_id_column){
|
||||
if(nrow(data) == 0)
|
||||
return(create_empty_dataframe(episode_type))
|
||||
|
||||
data %>%
|
||||
group_by(across(all_of(episode_id_column))) %>%
|
||||
summarise(duration = (max(timestamp) - min(timestamp)) / 60000 + 1,
|
||||
mets = sum(mets),
|
||||
calories = sum(value),
|
||||
start_time = min(time_since_ref),
|
||||
end_time = max(time_since_ref) + 1) %>%
|
||||
summarise("countepisode{episode_type}" := n(),
|
||||
"starttimefirstepisode{episode_type}" := first(start_time),
|
||||
"endtimefirstepisode{episode_type}" := first(end_time),
|
||||
"starttimelastepisode{episode_type}" := last(start_time),
|
||||
"endtimelastepisode{episode_type}" := last(end_time),
|
||||
"starttimelongestepisode{episode_type}" := longest(duration, start_time),
|
||||
"endtimelongestepisode{episode_type}" := longest(duration, end_time),
|
||||
across(duration, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
|
||||
across(calories, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
|
||||
across(mets, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"))
|
||||
}
|
||||
|
||||
rapids_features <- function(sensor_data_files, time_segment, provider){
|
||||
calories <- read_csv(snakemake@input[["sensor_data"]],
|
||||
col_types = cols_only(level="i", mets="d", value="d", local_date_time="T",assigned_segments="c", timestamp="d"))# %>%
|
||||
MET_THRESHOLD <- provider[["EPISODE_MET_THRESHOLD"]]
|
||||
MVPA_LABELS <- provider[["EPISODE_MVPA_CATEGORIES"]]
|
||||
FITBIT_LEVELS <- c("sedentary", "lightlyactive", "fairlyactive", "veryactive")
|
||||
MVPA_LEVELS <- which(FITBIT_LEVELS %in% MVPA_LABELS) - 1
|
||||
EPISODE_TIME_THRESHOLD <- provider[["EPISODE_TIME_THRESHOLD"]]
|
||||
EPISODE_REFERENCE_TIME <- provider[["EPISODE_REFERENCE_TIME"]]
|
||||
REQUESTED_EPISODES <- provider[["EPISODE_TYPE"]]
|
||||
REQUESTED_FEATURES <- provider[["FEATURES"]]
|
||||
|
||||
calories <- calories %>% filter_data_by_segment(time_segment)
|
||||
|
||||
if(nrow(calories) == 0)
|
||||
return(bind_cols(lapply(REQUESTED_EPISODES, function(episode_type) episode_type_features(calories, episode_type, ""))) %>%
|
||||
add_column(local_segment = character(), .before = 1) %>%
|
||||
select(starts_with(c("local_segment", REQUESTED_FEATURES))))
|
||||
|
||||
calories <- calories %>%
|
||||
extract(timestamps_segment, regex = "(\\d*),", into = c("segment_start_ts"), remove = TRUE, convert = TRUE) %>%
|
||||
arrange(timestamp) %>%
|
||||
mutate(consecutive = c(0,diff(timestamp) / 60000),
|
||||
level_diff = c(0, diff(level)),
|
||||
mvpa_diff = c(1, diff(if_else(level %in% MVPA_LEVELS, 1, 0))),
|
||||
met_diff = c(1, diff(if_else(mets >= MET_THRESHOLD, 1, 0))),
|
||||
level_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | level_diff != 0),
|
||||
mvpa_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | mvpa_diff != 0),
|
||||
met_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | met_diff != 0),
|
||||
time_since_ref = case_when(EPISODE_REFERENCE_TIME == "MIDNIGHT" ~ ((hour(local_date_time) *3600) + (minute(local_date_time) * 60) + second(local_date_time))/60,
|
||||
EPISODE_REFERENCE_TIME == "START_OF_THE_SEGMENT" ~ (timestamp - segment_start_ts) / 60000)
|
||||
) %>%
|
||||
select(-consecutive, -level_diff, -mvpa_diff, -met_diff) %>%
|
||||
group_by(local_segment) %>%
|
||||
nest() %>%
|
||||
mutate(sedentary = map(data, ~ episode_type_features(.x %>% filter(level == 0) , "sedentary", "level_episode_id")),
|
||||
lightlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 1) , "lightlyactive", "level_episode_id")),
|
||||
fairlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 2) , "fairlyactive", "level_episode_id")),
|
||||
veryactive = map(data, ~ episode_type_features(.x %>% filter(level == 3) , "veryactive", "level_episode_id")),
|
||||
mvpa = map(data, ~ episode_type_features(.x %>% filter(level >= 2) , "mvpa", "mvpa_episode_id")),
|
||||
lowmet = map(data, ~ episode_type_features(.x %>% filter(mets < MET_THRESHOLD) , "lowmet", "met_episode_id")),
|
||||
highmet = map(data, ~ episode_type_features(.x %>% filter(mets >= MET_THRESHOLD) , "highmet", "met_episode_id"))
|
||||
) %>%
|
||||
ungroup() %>%
|
||||
select(all_of(c("local_segment", REQUESTED_EPISODES))) %>%
|
||||
unnest(everything(), keep_empty=TRUE) %>%
|
||||
select(starts_with(c("local_segment", REQUESTED_FEATURES)))
|
||||
}
|
|
@ -11,6 +11,10 @@ filter_data_by_segment <- function(data, time_segment){
|
|||
mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>%
|
||||
extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>%
|
||||
select(-assigned_segments)
|
||||
|
||||
# chunk episodes
|
||||
if (nrow(data) > 0 && all(c("start_timestamp","end_timestamp") %in% colnames(data)) )
|
||||
data <- chunk_episodes(data)
|
||||
return(data)
|
||||
}
|
||||
|
||||
|
|
|
@ -1125,6 +1125,47 @@ properties:
|
|||
INCLUDE_ZERO_STEP_ROWS:
|
||||
type: boolean
|
||||
|
||||
|
||||
FITBIT_CALORIES_INTRADAY:
|
||||
type: object
|
||||
required: [CONTAINER, PROVIDERS]
|
||||
properties:
|
||||
CONTAINER:
|
||||
type: string
|
||||
PROVIDERS:
|
||||
type: ["null", object]
|
||||
properties:
|
||||
RAPIDS:
|
||||
allOf:
|
||||
- $ref: "#/definitions/PROVIDER"
|
||||
- properties:
|
||||
FEATURES:
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
|
||||
EPISODE_TYPE:
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
|
||||
EPISODE_TIME_THRESHOLD:
|
||||
type: integer
|
||||
minimum: 1
|
||||
EPISODE_MET_THRESHOLD:
|
||||
type: integer
|
||||
minimum: 1
|
||||
EPISODE_MVPA_CATEGORIES:
|
||||
uniqueItems: True
|
||||
items:
|
||||
type: string
|
||||
enum: [sedentary, lightlyactive, fairlyactive, veryactive]
|
||||
EPISODE_REFERENCE_TIME:
|
||||
type: string
|
||||
enum: [MIDNIGHT, START_OF_THE_SEGMENT]
|
||||
additionalProperties:
|
||||
$ref: "#/definitions/PROVIDER"
|
||||
|
||||
HISTOGRAM_PHONE_DATA_YIELD:
|
||||
type: object
|
||||
required: [PLOT]
|
||||
|
|
Loading…
Reference in New Issue