Add calories intraday features

pull/131/head
JulioV 2021-04-16 18:02:43 -04:00
parent 20910bf1dc
commit 9c56422529
17 changed files with 351 additions and 3 deletions

View File

@ -217,6 +217,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"].keys():
if config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_calories_intraday.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys(): for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))

View File

@ -337,6 +337,19 @@ FITBIT_DATA_STREAMS:
# Sensors ------ # Sensors ------
FITBIT_CALORIES_INTRADAY:
CONTAINER: fitbit_data
PROVIDERS:
RAPIDS:
COMPUTE: False
EPISODE_TYPE: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
EPISODE_TIME_THRESHOLD: 5 # minutes
EPISODE_MET_THRESHOLD: 3
EPISODE_MVPA_CATEGORIES: [fairlyactive, veryactive]
EPISODE_REFERENCE_TIME: MIDNIGHT # or START_OF_THE_SEGMENT
FEATURES: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
SRC_SCRIPT: src/features/fitbit_calories_intraday/rapids/main.R
# See https://www.rapids.science/latest/features/fitbit-data-yield/ # See https://www.rapids.science/latest/features/fitbit-data-yield/
FITBIT_DATA_YIELD: FITBIT_DATA_YIELD:
SENSOR: FITBIT_HEARTRATE_INTRADAY SENSOR: FITBIT_HEARTRATE_INTRADAY

View File

@ -0,0 +1,68 @@
# Fitbit Calories Intraday
Sensor parameters description for `[FITBIT_CALORIES_INTRADAY]`:
|Key                              | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[CONTAINER]`| Container where your calories intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
## RAPIDS provider
!!! info "Available time segments"
- Available for all time segments
!!! info "File Sequence"
```bash
- data/raw/{pid}/fitbit_calories_intraday_raw.csv
- data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv
- data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv
- data/processed/features/{pid}/fitbit_calories_intraday.csv
```
Parameters description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|Key                                                | Description |
|----------------|-----------------------------------------------------------------------------------------------------------------------------------
|`[COMPUTE]` | Set to `True` to extract `FITBIT_CALORIES_INTRADAY` features from the `RAPIDS` provider|
|`[FEATURES]` | Features to be computed from calories intraday data, see table below |
|`[EPISODE_TYPE]` | RAPIDS will compute features for any episodes in this list. There are seven types of episodes defined as consecutive appearances of a label. Four are based on the activity level labels provided by Fitbit: `sedentary`, `lightly active`, `fairly active`, and `very active`. One is defined by RAPIDS as moderate to vigorous physical activity `MVPA` episodes that are based on all `fairly active`, and `very active` labels. Two are defined by the user based on a threshold that divides low or high MET (metabolic equivalent) episodes. |
|`EPISODE_TIME_THRESHOLD` | Any consecutive rows of the same `[EPISODE_TYPE]` will be considered a single episode if the time difference between them is less or equal than this threshold in minutes|
|`[EPISODE_MET_THRESHOLD]` | Any 1-minute calorie data chunk with a MET value equal or higher than this threshold will be considered a high MET episode and low MET otherwise. The default value is 3|
|`[EPISODE_MVPA_CATEGORIES]` | The Fitbit level labels that are considered part of a moderate to vigorous physical activity episode. One or more of `sedentary`, `lightly active`, `fairly active`, and `very active`. The default are `fairly active` and `very active`|
|`[EPISODE_REFERENCE_TIME]` | Reference time for the start/end time features. `MIDNIGHT` sets the reference time to 00:00 of each day, `START_OF_THE_SEGMENT` sets the reference time to the start of the time segment (useful when a segment is shorter than a day or spans multiple days)|
Features description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`:
|Feature                                                            |Units |Description|
|-------------------------- |---------- |---------------------------|
|starttimefirstepisode`EPISODE_TYPE` |minutes |Start time of the first episode of type `[EPISODE_TYPE]`
|endtimefirstepisode`EPISODE_TYPE` |minutes |End time of the first episode of type `[EPISODE_TYPE]`
|starttimelastepisode`EPISODE_TYPE` |minutes |Start time of the last episode of type `[EPISODE_TYPE]`
|endtimelastepisode`EPISODE_TYPE` |minutes |End time of the last episode of type `[EPISODE_TYPE]`
|starttimelongestepisode`EPISODE_TYPE` |minutes |Start time of the longest episode of type `[EPISODE_TYPE]`
|endtimelongestepisode`EPISODE_TYPE` |minutes |End time of the longest episode of type `[EPISODE_TYPE]`
|countepisode`EPISODE_TYPE` |episodes |The number of episodes of type `[EPISODE_TYPE]`
|sumdurationepisode`EPISODE_TYPE` |minutes |The sum of the duration of episodes of type `[EPISODE_TYPE]`
|avgdurationepisode`EPISODE_TYPE` |minutes |The average of the duration of episodes of type `[EPISODE_TYPE]`
|maxdurationepisode`EPISODE_TYPE` |minutes |The maximum of the duration of episodes of type `[EPISODE_TYPE]`
|mindurationepisode`EPISODE_TYPE` |minutes |The minimum of the duration of episodes of type `[EPISODE_TYPE]`
|stddurationepisode`EPISODE_TYPE` |minutes |The standard deviation of the duration of episodes of type `[EPISODE_TYPE]`
|summet`EPISODE_TYPE` |METs |The sum of all METs during episodes of type `[EPISODE_TYPE]`
|avgmet`EPISODE_TYPE` |METs |The average of all METs during episodes of type `[EPISODE_TYPE]`
|maxmet`EPISODE_TYPE` |METs |The maximum of all METs during episodes of type `[EPISODE_TYPE]`
|minmet`EPISODE_TYPE` |METs |The minimum of all METs during episodes of type `[EPISODE_TYPE]`
|stdmet`EPISODE_TYPE` |METs |The standard deviation of all METs during episodes of type `[EPISODE_TYPE]`
|sumcalories`EPISODE_TYPE` |calories |The sum of all calories during episodes of type `[EPISODE_TYPE]`
|avgcalories`EPISODE_TYPE` |calories |The average of all calories during episodes of type `[EPISODE_TYPE]`
|maxcalories`EPISODE_TYPE` |calories |The maximum of all calories during episodes of type `[EPISODE_TYPE]`
|mincalories`EPISODE_TYPE` |calories |The minimum of all calories during episodes of type `[EPISODE_TYPE]`
|stdcalories`EPISODE_TYPE` |calories |The standard deviation of all calories during episodes of type `[EPISODE_TYPE]`
!!! note "Assumptions/Observations"
- These features are based on intraday calories data that is usually obtained in 1-minute chunks from Fitbit's API.
- The MET value returned by Fitbit is divided by 10
- Take into account that the [intraday data returned by Fitbit](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series) can contain time series for calories burned inclusive of BMR, tracked activity, and manually logged activities.

View File

@ -119,6 +119,7 @@ nav:
- Phone WiFI Connected: features/phone-wifi-connected.md - Phone WiFI Connected: features/phone-wifi-connected.md
- Phone WiFI Visible: features/phone-wifi-visible.md - Phone WiFI Visible: features/phone-wifi-visible.md
- Fitbit: - Fitbit:
- Fitbit Calories Intraday: features/fitbit-calories-intraday.md
- Fitbit Data Yield: features/fitbit-data-yield.md - Fitbit Data Yield: features/fitbit-data-yield.md
- Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md - Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md
- Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md - Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md

View File

@ -516,6 +516,32 @@ rule phone_wifi_visible_r_features:
script: script:
"../src/features/entry.R" "../src/features/entry.R"
rule fitbit_calories_intraday_python_features:
input:
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_calories_intraday"
output:
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_python_{provider_key}.csv"
script:
"../src/features/entry.py"
rule fitbit_calories_intraday_r_features:
input:
sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params:
provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}",
sensor_key = "fitbit_calories_intraday"
output:
"data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_r_{provider_key}.csv"
script:
"../src/features/entry.R"
rule fitbit_data_yield_python_features: rule fitbit_data_yield_python_features:
input: input:
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",

View File

@ -107,7 +107,6 @@ get_participant_most_common_tz <- function(tz_codes_file, participant_file){
return(most_common_tz) return(most_common_tz)
} }
# TODO include CSV timezone file in rule
multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){ multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){
if(nrow(sensor_data) == 0) if(nrow(sensor_data) == 0)
return(sensor_data %>% mutate(local_timezone = NA_character_)) return(sensor_data %>% mutate(local_timezone = NA_character_))

View File

@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
FITBIT_CALORIES_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: FLAG_TO_MUTATE
LEVEL: FLAG_TO_MUTATE
METS: FLAG_TO_MUTATE
VALUE: FLAG_TO_MUTATE
MUTATION:
COLUMN_MAPPINGS:
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py

View File

@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY:
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_steps_intraday_json.py - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
FITBIT_CALORIES_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: FLAG_TO_MUTATE
LEVEL: FLAG_TO_MUTATE
METS: FLAG_TO_MUTATE
VALUE: FLAG_TO_MUTATE
MUTATION:
COLUMN_MAPPINGS:
JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/parse_calories_intraday_json.py

View File

@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
COLUMN_MAPPINGS: COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/add_zero_timestamp.py - src/data/streams/mutations/fitbit/add_zero_timestamp.py
FITBIT_CALORIES_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: local_date_time
LEVEL: level
METS: mets
VALUE: value
MUTATION:
COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/add_zero_timestamp.py

View File

@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY:
COLUMN_MAPPINGS: COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/add_zero_timestamp.py - src/data/streams/mutations/fitbit/add_zero_timestamp.py
FITBIT_CALORIES_INTRADAY:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: FLAG_TO_MUTATE
DEVICE_ID: device_id
LOCAL_DATE_TIME: local_date_time
LEVEL: level
METS: mets
VALUE: value
MUTATION:
COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data
- src/data/streams/mutations/fitbit/add_zero_timestamp.py

View File

@ -0,0 +1,33 @@
import json
import pandas as pd
from datetime import datetime
CALORIES_INTRADAY_COLUMNS = ("device_id", "level", "mets", "value", "local_date_time", "timestamp")
def parseCaloriesData(calories_data):
if calories_data.empty:
return pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
device_id = calories_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in calories_data.json_fitbit_column:
record = json.loads(record) # Parse text into JSON
if "activities-calories" in record and "activities-calories-intraday" in record:
curr_date = datetime.strptime(record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
dataset = record["activities-calories-intraday"]["dataset"]
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
row_intraday = (device_id, data["level"], data["mets"], data["value"], d_datetime, 0)
records_intraday.append(row_intraday)
return pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
def main(json_raw, stream_parameters):
parsed_data = parseCaloriesData(json_raw)
parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
parsed_data["mets"] = parsed_data["mets"] / 10
if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']):
parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
return(parsed_data)

View File

@ -154,7 +154,7 @@ pull_phone_data <- function(){
infer_device_os_container <- container_functions$infer_device_os infer_device_os_container <- container_functions$infer_device_os
pull_data_container <- container_functions$pull_data pull_data_container <- container_functions$pull_data
for(idx in seq_along(devices)){ #TODO remove length for(idx in seq_along(devices)){
device <- devices[idx] device <- devices[idx]
message(paste0("\nProcessing ", sensor, " for ", device)) message(paste0("\nProcessing ", sensor, " for ", device))

View File

@ -115,7 +115,7 @@ pull_wearable_data_main <- function(){
pull_data_container <- load_container_script(stream_container) pull_data_container <- load_container_script(stream_container)
for(idx in seq_along(devices)){ #TODO remove length for(idx in seq_along(devices)){
device <- devices[idx] device <- devices[idx]
message(paste0("\nProcessing ", sensor, " for ", device)) message(paste0("\nProcessing ", sensor, " for ", device))

View File

@ -181,6 +181,14 @@ FITBIT_STEPS_INTRADAY:
- LOCAL_DATE_TIME - LOCAL_DATE_TIME
- STEPS - STEPS
FITBIT_CALORIES_INTRADAY:
- TIMESTAMP
- DEVICE_ID
- LOCAL_DATE_TIME
- LEVEL
- METS
- VALUE
EMPATICA_ACCELEROMETER: EMPATICA_ACCELEROMETER:
- TIMESTAMP - TIMESTAMP
- DEVICE_ID - DEVICE_ID

View File

@ -0,0 +1,92 @@
source("renv/activate.R")
library(tidyverse)
library(lubridate)
library(glue)
create_empty_dataframe <- function(episode_type){
integer_columns <- c("countepisode{episode_type}", "starttimefirstepisode{episode_type}", "endtimefirstepisode{episode_type}", "starttimelastepisode{episode_type}", "endtimelastepisode{episode_type}", "starttimelongestepisode{episode_type}", "endtimelongestepisode{episode_type}")
integer_columns <- sapply(integer_columns, function(x) glue(x), simplify = TRUE, USE.NAMES = FALSE)
double_columns <- c()
for(col in c("duration", "calories", "mets"))
for(fun in c("sum", "mean", "min","max","sd"))
double_columns <- c(double_columns, glue("{fun}{col}episode{episode_type}"))
as_tibble(c(sapply(integer_columns, function(x) integer()), sapply(double_columns, function(x) numeric())))
}
longest <- function(duration, time){
position_longest <- min(which(duration == max(duration)))
time[position_longest]
}
episode_type_features <- function(data, episode_type, episode_id_column){
if(nrow(data) == 0)
return(create_empty_dataframe(episode_type))
data %>%
group_by(across(all_of(episode_id_column))) %>%
summarise(duration = (max(timestamp) - min(timestamp)) / 60000 + 1,
mets = sum(mets),
calories = sum(value),
start_time = min(time_since_ref),
end_time = max(time_since_ref) + 1) %>%
summarise("countepisode{episode_type}" := n(),
"starttimefirstepisode{episode_type}" := first(start_time),
"endtimefirstepisode{episode_type}" := first(end_time),
"starttimelastepisode{episode_type}" := last(start_time),
"endtimelastepisode{episode_type}" := last(end_time),
"starttimelongestepisode{episode_type}" := longest(duration, start_time),
"endtimelongestepisode{episode_type}" := longest(duration, end_time),
across(duration, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
across(calories, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"),
across(mets, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"))
}
rapids_features <- function(sensor_data_files, time_segment, provider){
calories <- read_csv(snakemake@input[["sensor_data"]],
col_types = cols_only(level="i", mets="d", value="d", local_date_time="T",assigned_segments="c", timestamp="d"))# %>%
MET_THRESHOLD <- provider[["EPISODE_MET_THRESHOLD"]]
MVPA_LABELS <- provider[["EPISODE_MVPA_CATEGORIES"]]
FITBIT_LEVELS <- c("sedentary", "lightlyactive", "fairlyactive", "veryactive")
MVPA_LEVELS <- which(FITBIT_LEVELS %in% MVPA_LABELS) - 1
EPISODE_TIME_THRESHOLD <- provider[["EPISODE_TIME_THRESHOLD"]]
EPISODE_REFERENCE_TIME <- provider[["EPISODE_REFERENCE_TIME"]]
REQUESTED_EPISODES <- provider[["EPISODE_TYPE"]]
REQUESTED_FEATURES <- provider[["FEATURES"]]
calories <- calories %>% filter_data_by_segment(time_segment)
if(nrow(calories) == 0)
return(bind_cols(lapply(REQUESTED_EPISODES, function(episode_type) episode_type_features(calories, episode_type, ""))) %>%
add_column(local_segment = character(), .before = 1) %>%
select(starts_with(c("local_segment", REQUESTED_FEATURES))))
calories <- calories %>%
extract(timestamps_segment, regex = "(\\d*),", into = c("segment_start_ts"), remove = TRUE, convert = TRUE) %>%
arrange(timestamp) %>%
mutate(consecutive = c(0,diff(timestamp) / 60000),
level_diff = c(0, diff(level)),
mvpa_diff = c(1, diff(if_else(level %in% MVPA_LEVELS, 1, 0))),
met_diff = c(1, diff(if_else(mets >= MET_THRESHOLD, 1, 0))),
level_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | level_diff != 0),
mvpa_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | mvpa_diff != 0),
met_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | met_diff != 0),
time_since_ref = case_when(EPISODE_REFERENCE_TIME == "MIDNIGHT" ~ ((hour(local_date_time) *3600) + (minute(local_date_time) * 60) + second(local_date_time))/60,
EPISODE_REFERENCE_TIME == "START_OF_THE_SEGMENT" ~ (timestamp - segment_start_ts) / 60000)
) %>%
select(-consecutive, -level_diff, -mvpa_diff, -met_diff) %>%
group_by(local_segment) %>%
nest() %>%
mutate(sedentary = map(data, ~ episode_type_features(.x %>% filter(level == 0) , "sedentary", "level_episode_id")),
lightlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 1) , "lightlyactive", "level_episode_id")),
fairlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 2) , "fairlyactive", "level_episode_id")),
veryactive = map(data, ~ episode_type_features(.x %>% filter(level == 3) , "veryactive", "level_episode_id")),
mvpa = map(data, ~ episode_type_features(.x %>% filter(level >= 2) , "mvpa", "mvpa_episode_id")),
lowmet = map(data, ~ episode_type_features(.x %>% filter(mets < MET_THRESHOLD) , "lowmet", "met_episode_id")),
highmet = map(data, ~ episode_type_features(.x %>% filter(mets >= MET_THRESHOLD) , "highmet", "met_episode_id"))
) %>%
ungroup() %>%
select(all_of(c("local_segment", REQUESTED_EPISODES))) %>%
unnest(everything(), keep_empty=TRUE) %>%
select(starts_with(c("local_segment", REQUESTED_FEATURES)))
}

View File

@ -11,6 +11,10 @@ filter_data_by_segment <- function(data, time_segment){
mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>% mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>%
extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>% extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>%
select(-assigned_segments) select(-assigned_segments)
# chunk episodes
if (nrow(data) > 0 && all(c("start_timestamp","end_timestamp") %in% colnames(data)) )
data <- chunk_episodes(data)
return(data) return(data)
} }

View File

@ -1125,6 +1125,47 @@ properties:
INCLUDE_ZERO_STEP_ROWS: INCLUDE_ZERO_STEP_ROWS:
type: boolean type: boolean
FITBIT_CALORIES_INTRADAY:
type: object
required: [CONTAINER, PROVIDERS]
properties:
CONTAINER:
type: string
PROVIDERS:
type: ["null", object]
properties:
RAPIDS:
allOf:
- $ref: "#/definitions/PROVIDER"
- properties:
FEATURES:
uniqueItems: True
items:
type: string
enum: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories]
EPISODE_TYPE:
uniqueItems: True
items:
type: string
enum: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet]
EPISODE_TIME_THRESHOLD:
type: integer
minimum: 1
EPISODE_MET_THRESHOLD:
type: integer
minimum: 1
EPISODE_MVPA_CATEGORIES:
uniqueItems: True
items:
type: string
enum: [sedentary, lightlyactive, fairlyactive, veryactive]
EPISODE_REFERENCE_TIME:
type: string
enum: [MIDNIGHT, START_OF_THE_SEGMENT]
additionalProperties:
$ref: "#/definitions/PROVIDER"
HISTOGRAM_PHONE_DATA_YIELD: HISTOGRAM_PHONE_DATA_YIELD:
type: object type: object
required: [PLOT] required: [PLOT]