diff --git a/Snakefile b/Snakefile index 71a6fc5c..1491240a 100644 --- a/Snakefile +++ b/Snakefile @@ -217,6 +217,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") +for provider in config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"].keys(): + if config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_raw.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_calories_intraday.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys(): if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"])) diff --git a/config.yaml b/config.yaml index f2997bca..61d5b939 100644 --- a/config.yaml +++ b/config.yaml @@ -337,6 +337,19 @@ FITBIT_DATA_STREAMS: # Sensors ------ +FITBIT_CALORIES_INTRADAY: + CONTAINER: fitbit_data + PROVIDERS: + RAPIDS: + COMPUTE: False + EPISODE_TYPE: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet] + EPISODE_TIME_THRESHOLD: 5 # minutes + EPISODE_MET_THRESHOLD: 3 + EPISODE_MVPA_CATEGORIES: [fairlyactive, veryactive] + EPISODE_REFERENCE_TIME: MIDNIGHT # or START_OF_THE_SEGMENT + FEATURES: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories] + SRC_SCRIPT: src/features/fitbit_calories_intraday/rapids/main.R + # See https://www.rapids.science/latest/features/fitbit-data-yield/ FITBIT_DATA_YIELD: SENSOR: FITBIT_HEARTRATE_INTRADAY diff --git a/docs/features/fitbit-calories-intraday.md b/docs/features/fitbit-calories-intraday.md new file mode 100644 index 00000000..e6a6a6eb --- /dev/null +++ b/docs/features/fitbit-calories-intraday.md @@ -0,0 +1,68 @@ +# Fitbit Calories Intraday + +Sensor parameters description for `[FITBIT_CALORIES_INTRADAY]`: + +|Key                              | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[CONTAINER]`| Container where your calories intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. | + + +## RAPIDS provider + +!!! info "Available time segments" + - Available for all time segments + +!!! info "File Sequence" + ```bash + - data/raw/{pid}/fitbit_calories_intraday_raw.csv + - data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv + - data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_{language}_{provider_key}.csv + - data/processed/features/{pid}/fitbit_calories_intraday.csv + ``` + + +Parameters description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`: + +|Key                                                | Description | +|----------------|----------------------------------------------------------------------------------------------------------------------------------- +|`[COMPUTE]` | Set to `True` to extract `FITBIT_CALORIES_INTRADAY` features from the `RAPIDS` provider| +|`[FEATURES]` | Features to be computed from calories intraday data, see table below | +|`[EPISODE_TYPE]` | RAPIDS will compute features for any episodes in this list. There are seven types of episodes defined as consecutive appearances of a label. Four are based on the activity level labels provided by Fitbit: `sedentary`, `lightly active`, `fairly active`, and `very active`. One is defined by RAPIDS as moderate to vigorous physical activity `MVPA` episodes that are based on all `fairly active`, and `very active` labels. Two are defined by the user based on a threshold that divides low or high MET (metabolic equivalent) episodes. | +|`EPISODE_TIME_THRESHOLD` | Any consecutive rows of the same `[EPISODE_TYPE]` will be considered a single episode if the time difference between them is less or equal than this threshold in minutes| +|`[EPISODE_MET_THRESHOLD]` | Any 1-minute calorie data chunk with a MET value equal or higher than this threshold will be considered a high MET episode and low MET otherwise. The default value is 3| +|`[EPISODE_MVPA_CATEGORIES]` | The Fitbit level labels that are considered part of a moderate to vigorous physical activity episode. One or more of `sedentary`, `lightly active`, `fairly active`, and `very active`. The default are `fairly active` and `very active`| +|`[EPISODE_REFERENCE_TIME]` | Reference time for the start/end time features. `MIDNIGHT` sets the reference time to 00:00 of each day, `START_OF_THE_SEGMENT` sets the reference time to the start of the time segment (useful when a segment is shorter than a day or spans multiple days)| + + +Features description for `[FITBIT_CALORIES_INTRADAY][PROVIDERS][RAPIDS]`: + +|Feature                                                            |Units |Description| +|-------------------------- |---------- |---------------------------| +|starttimefirstepisode`EPISODE_TYPE` |minutes |Start time of the first episode of type `[EPISODE_TYPE]` +|endtimefirstepisode`EPISODE_TYPE` |minutes |End time of the first episode of type `[EPISODE_TYPE]` +|starttimelastepisode`EPISODE_TYPE` |minutes |Start time of the last episode of type `[EPISODE_TYPE]` +|endtimelastepisode`EPISODE_TYPE` |minutes |End time of the last episode of type `[EPISODE_TYPE]` +|starttimelongestepisode`EPISODE_TYPE` |minutes |Start time of the longest episode of type `[EPISODE_TYPE]` +|endtimelongestepisode`EPISODE_TYPE` |minutes |End time of the longest episode of type `[EPISODE_TYPE]` +|countepisode`EPISODE_TYPE` |episodes |The number of episodes of type `[EPISODE_TYPE]` +|sumdurationepisode`EPISODE_TYPE` |minutes |The sum of the duration of episodes of type `[EPISODE_TYPE]` +|avgdurationepisode`EPISODE_TYPE` |minutes |The average of the duration of episodes of type `[EPISODE_TYPE]` +|maxdurationepisode`EPISODE_TYPE` |minutes |The maximum of the duration of episodes of type `[EPISODE_TYPE]` +|mindurationepisode`EPISODE_TYPE` |minutes |The minimum of the duration of episodes of type `[EPISODE_TYPE]` +|stddurationepisode`EPISODE_TYPE` |minutes |The standard deviation of the duration of episodes of type `[EPISODE_TYPE]` +|summet`EPISODE_TYPE` |METs |The sum of all METs during episodes of type `[EPISODE_TYPE]` +|avgmet`EPISODE_TYPE` |METs |The average of all METs during episodes of type `[EPISODE_TYPE]` +|maxmet`EPISODE_TYPE` |METs |The maximum of all METs during episodes of type `[EPISODE_TYPE]` +|minmet`EPISODE_TYPE` |METs |The minimum of all METs during episodes of type `[EPISODE_TYPE]` +|stdmet`EPISODE_TYPE` |METs |The standard deviation of all METs during episodes of type `[EPISODE_TYPE]` +|sumcalories`EPISODE_TYPE` |calories |The sum of all calories during episodes of type `[EPISODE_TYPE]` +|avgcalories`EPISODE_TYPE` |calories |The average of all calories during episodes of type `[EPISODE_TYPE]` +|maxcalories`EPISODE_TYPE` |calories |The maximum of all calories during episodes of type `[EPISODE_TYPE]` +|mincalories`EPISODE_TYPE` |calories |The minimum of all calories during episodes of type `[EPISODE_TYPE]` +|stdcalories`EPISODE_TYPE` |calories |The standard deviation of all calories during episodes of type `[EPISODE_TYPE]` + + +!!! note "Assumptions/Observations" + - These features are based on intraday calories data that is usually obtained in 1-minute chunks from Fitbit's API. + - The MET value returned by Fitbit is divided by 10 + - Take into account that the [intraday data returned by Fitbit](https://dev.fitbit.com/build/reference/web-api/activity/#get-activity-intraday-time-series) can contain time series for calories burned inclusive of BMR, tracked activity, and manually logged activities. diff --git a/mkdocs.yml b/mkdocs.yml index 8979122b..fd80747d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -119,6 +119,7 @@ nav: - Phone WiFI Connected: features/phone-wifi-connected.md - Phone WiFI Visible: features/phone-wifi-visible.md - Fitbit: + - Fitbit Calories Intraday: features/fitbit-calories-intraday.md - Fitbit Data Yield: features/fitbit-data-yield.md - Fitbit Heart Rate Summary: features/fitbit-heartrate-summary.md - Fitbit Heart Rate Intraday: features/fitbit-heartrate-intraday.md diff --git a/rules/features.smk b/rules/features.smk index 7f471968..5a37ceaa 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -516,6 +516,32 @@ rule phone_wifi_visible_r_features: script: "../src/features/entry.R" +rule fitbit_calories_intraday_python_features: + input: + sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "fitbit_calories_intraday" + output: + "data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_python_{provider_key}.csv" + script: + "../src/features/entry.py" + +rule fitbit_calories_intraday_r_features: + input: + sensor_data = "data/raw/{pid}/fitbit_calories_intraday_with_datetime.csv", + time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" + params: + provider = lambda wildcards: config["FITBIT_CALORIES_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()], + provider_key = "{provider_key}", + sensor_key = "fitbit_calories_intraday" + output: + "data/interim/{pid}/fitbit_calories_intraday_features/fitbit_calories_intraday_r_{provider_key}.csv" + script: + "../src/features/entry.R" + rule fitbit_data_yield_python_features: input: sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", diff --git a/src/data/datetime/assign_to_multiple_timezones.R b/src/data/datetime/assign_to_multiple_timezones.R index f94cface..d1aa2337 100644 --- a/src/data/datetime/assign_to_multiple_timezones.R +++ b/src/data/datetime/assign_to_multiple_timezones.R @@ -107,7 +107,6 @@ get_participant_most_common_tz <- function(tz_codes_file, participant_file){ return(most_common_tz) } -# TODO include CSV timezone file in rule multiple_time_zone_assignment <- function(sensor_data, timezone_parameters, device_type, pid, participant_file){ if(nrow(sensor_data) == 0) return(sensor_data %>% mutate(local_timezone = NA_character_)) diff --git a/src/data/streams/fitbitjson_csv/format.yaml b/src/data/streams/fitbitjson_csv/format.yaml index 16c3b73e..bf3ce7f6 100644 --- a/src/data/streams/fitbitjson_csv/format.yaml +++ b/src/data/streams/fitbitjson_csv/format.yaml @@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY: JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py + +FITBIT_CALORIES_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: FLAG_TO_MUTATE + LEVEL: FLAG_TO_MUTATE + METS: FLAG_TO_MUTATE + VALUE: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/parse_calories_intraday_json.py diff --git a/src/data/streams/fitbitjson_mysql/format.yaml b/src/data/streams/fitbitjson_mysql/format.yaml index 16c3b73e..bf3ce7f6 100644 --- a/src/data/streams/fitbitjson_mysql/format.yaml +++ b/src/data/streams/fitbitjson_mysql/format.yaml @@ -87,3 +87,17 @@ FITBIT_STEPS_INTRADAY: JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py + +FITBIT_CALORIES_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: FLAG_TO_MUTATE + LEVEL: FLAG_TO_MUTATE + METS: FLAG_TO_MUTATE + VALUE: FLAG_TO_MUTATE + MUTATION: + COLUMN_MAPPINGS: + JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/parse_calories_intraday_json.py diff --git a/src/data/streams/fitbitparsed_csv/format.yaml b/src/data/streams/fitbitparsed_csv/format.yaml index ecaaab11..2ca1edab 100644 --- a/src/data/streams/fitbitparsed_csv/format.yaml +++ b/src/data/streams/fitbitparsed_csv/format.yaml @@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY: COLUMN_MAPPINGS: SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_CALORIES_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + LEVEL: level + METS: mets + VALUE: value + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py diff --git a/src/data/streams/fitbitparsed_mysql/format.yaml b/src/data/streams/fitbitparsed_mysql/format.yaml index ecaaab11..2ca1edab 100644 --- a/src/data/streams/fitbitparsed_mysql/format.yaml +++ b/src/data/streams/fitbitparsed_mysql/format.yaml @@ -81,3 +81,16 @@ FITBIT_STEPS_INTRADAY: COLUMN_MAPPINGS: SCRIPTS: # List any python or r scripts that mutate your raw data - src/data/streams/mutations/fitbit/add_zero_timestamp.py + +FITBIT_CALORIES_INTRADAY: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: FLAG_TO_MUTATE + DEVICE_ID: device_id + LOCAL_DATE_TIME: local_date_time + LEVEL: level + METS: mets + VALUE: value + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + - src/data/streams/mutations/fitbit/add_zero_timestamp.py diff --git a/src/data/streams/mutations/fitbit/parse_calories_intraday_json.py b/src/data/streams/mutations/fitbit/parse_calories_intraday_json.py new file mode 100644 index 00000000..874b0624 --- /dev/null +++ b/src/data/streams/mutations/fitbit/parse_calories_intraday_json.py @@ -0,0 +1,33 @@ +import json +import pandas as pd +from datetime import datetime + +CALORIES_INTRADAY_COLUMNS = ("device_id", "level", "mets", "value", "local_date_time", "timestamp") + +def parseCaloriesData(calories_data): + if calories_data.empty: + return pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS) + device_id = calories_data["device_id"].iloc[0] + records_intraday = [] + + # Parse JSON into individual records + for record in calories_data.json_fitbit_column: + record = json.loads(record) # Parse text into JSON + if "activities-calories" in record and "activities-calories-intraday" in record: + curr_date = datetime.strptime(record["activities-calories"][0]["dateTime"], "%Y-%m-%d") + dataset = record["activities-calories-intraday"]["dataset"] + for data in dataset: + d_time = datetime.strptime(data["time"], '%H:%M:%S').time() + d_datetime = datetime.combine(curr_date, d_time) + row_intraday = (device_id, data["level"], data["mets"], data["value"], d_datetime, 0) + records_intraday.append(row_intraday) + + return pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS) + +def main(json_raw, stream_parameters): + parsed_data = parseCaloriesData(json_raw) + parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones + parsed_data["mets"] = parsed_data["mets"] / 10 + if pd.api.types.is_datetime64_any_dtype( parsed_data['local_date_time']): + parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S') + return(parsed_data) diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R index a0e4e036..6ef589cf 100644 --- a/src/data/streams/pull_phone_data.R +++ b/src/data/streams/pull_phone_data.R @@ -154,7 +154,7 @@ pull_phone_data <- function(){ infer_device_os_container <- container_functions$infer_device_os pull_data_container <- container_functions$pull_data - for(idx in seq_along(devices)){ #TODO remove length + for(idx in seq_along(devices)){ device <- devices[idx] message(paste0("\nProcessing ", sensor, " for ", device)) diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R index 247e0147..dcc43c8f 100644 --- a/src/data/streams/pull_wearable_data.R +++ b/src/data/streams/pull_wearable_data.R @@ -115,7 +115,7 @@ pull_wearable_data_main <- function(){ pull_data_container <- load_container_script(stream_container) - for(idx in seq_along(devices)){ #TODO remove length + for(idx in seq_along(devices)){ device <- devices[idx] message(paste0("\nProcessing ", sensor, " for ", device)) diff --git a/src/data/streams/rapids_columns.yaml b/src/data/streams/rapids_columns.yaml index d6e5ac36..e1aff997 100644 --- a/src/data/streams/rapids_columns.yaml +++ b/src/data/streams/rapids_columns.yaml @@ -181,6 +181,14 @@ FITBIT_STEPS_INTRADAY: - LOCAL_DATE_TIME - STEPS +FITBIT_CALORIES_INTRADAY: + - TIMESTAMP + - DEVICE_ID + - LOCAL_DATE_TIME + - LEVEL + - METS + - VALUE + EMPATICA_ACCELEROMETER: - TIMESTAMP - DEVICE_ID diff --git a/src/features/fitbit_calories_intraday/rapids/main.R b/src/features/fitbit_calories_intraday/rapids/main.R new file mode 100644 index 00000000..18396220 --- /dev/null +++ b/src/features/fitbit_calories_intraday/rapids/main.R @@ -0,0 +1,92 @@ +source("renv/activate.R") +library(tidyverse) +library(lubridate) +library(glue) + +create_empty_dataframe <- function(episode_type){ + integer_columns <- c("countepisode{episode_type}", "starttimefirstepisode{episode_type}", "endtimefirstepisode{episode_type}", "starttimelastepisode{episode_type}", "endtimelastepisode{episode_type}", "starttimelongestepisode{episode_type}", "endtimelongestepisode{episode_type}") + integer_columns <- sapply(integer_columns, function(x) glue(x), simplify = TRUE, USE.NAMES = FALSE) + double_columns <- c() + for(col in c("duration", "calories", "mets")) + for(fun in c("sum", "mean", "min","max","sd")) + double_columns <- c(double_columns, glue("{fun}{col}episode{episode_type}")) + + as_tibble(c(sapply(integer_columns, function(x) integer()), sapply(double_columns, function(x) numeric()))) +} + +longest <- function(duration, time){ + position_longest <- min(which(duration == max(duration))) + time[position_longest] +} + +episode_type_features <- function(data, episode_type, episode_id_column){ + if(nrow(data) == 0) + return(create_empty_dataframe(episode_type)) + + data %>% + group_by(across(all_of(episode_id_column))) %>% + summarise(duration = (max(timestamp) - min(timestamp)) / 60000 + 1, + mets = sum(mets), + calories = sum(value), + start_time = min(time_since_ref), + end_time = max(time_since_ref) + 1) %>% + summarise("countepisode{episode_type}" := n(), + "starttimefirstepisode{episode_type}" := first(start_time), + "endtimefirstepisode{episode_type}" := first(end_time), + "starttimelastepisode{episode_type}" := last(start_time), + "endtimelastepisode{episode_type}" := last(end_time), + "starttimelongestepisode{episode_type}" := longest(duration, start_time), + "endtimelongestepisode{episode_type}" := longest(duration, end_time), + across(duration, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"), + across(calories, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}"), + across(mets, list(sum=sum, avg=mean, min=min,max=max,std=sd), .names = "{.fn}{.col}episode{episode_type}")) +} + +rapids_features <- function(sensor_data_files, time_segment, provider){ + calories <- read_csv(snakemake@input[["sensor_data"]], + col_types = cols_only(level="i", mets="d", value="d", local_date_time="T",assigned_segments="c", timestamp="d"))# %>% + MET_THRESHOLD <- provider[["EPISODE_MET_THRESHOLD"]] + MVPA_LABELS <- provider[["EPISODE_MVPA_CATEGORIES"]] + FITBIT_LEVELS <- c("sedentary", "lightlyactive", "fairlyactive", "veryactive") + MVPA_LEVELS <- which(FITBIT_LEVELS %in% MVPA_LABELS) - 1 + EPISODE_TIME_THRESHOLD <- provider[["EPISODE_TIME_THRESHOLD"]] + EPISODE_REFERENCE_TIME <- provider[["EPISODE_REFERENCE_TIME"]] + REQUESTED_EPISODES <- provider[["EPISODE_TYPE"]] + REQUESTED_FEATURES <- provider[["FEATURES"]] + + calories <- calories %>% filter_data_by_segment(time_segment) + + if(nrow(calories) == 0) + return(bind_cols(lapply(REQUESTED_EPISODES, function(episode_type) episode_type_features(calories, episode_type, ""))) %>% + add_column(local_segment = character(), .before = 1) %>% + select(starts_with(c("local_segment", REQUESTED_FEATURES)))) + + calories <- calories %>% + extract(timestamps_segment, regex = "(\\d*),", into = c("segment_start_ts"), remove = TRUE, convert = TRUE) %>% + arrange(timestamp) %>% + mutate(consecutive = c(0,diff(timestamp) / 60000), + level_diff = c(0, diff(level)), + mvpa_diff = c(1, diff(if_else(level %in% MVPA_LEVELS, 1, 0))), + met_diff = c(1, diff(if_else(mets >= MET_THRESHOLD, 1, 0))), + level_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | level_diff != 0), + mvpa_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | mvpa_diff != 0), + met_episode_id = cumsum(consecutive > EPISODE_TIME_THRESHOLD | met_diff != 0), + time_since_ref = case_when(EPISODE_REFERENCE_TIME == "MIDNIGHT" ~ ((hour(local_date_time) *3600) + (minute(local_date_time) * 60) + second(local_date_time))/60, + EPISODE_REFERENCE_TIME == "START_OF_THE_SEGMENT" ~ (timestamp - segment_start_ts) / 60000) + ) %>% + select(-consecutive, -level_diff, -mvpa_diff, -met_diff) %>% + group_by(local_segment) %>% + nest() %>% + mutate(sedentary = map(data, ~ episode_type_features(.x %>% filter(level == 0) , "sedentary", "level_episode_id")), + lightlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 1) , "lightlyactive", "level_episode_id")), + fairlyactive = map(data, ~ episode_type_features(.x %>% filter(level == 2) , "fairlyactive", "level_episode_id")), + veryactive = map(data, ~ episode_type_features(.x %>% filter(level == 3) , "veryactive", "level_episode_id")), + mvpa = map(data, ~ episode_type_features(.x %>% filter(level >= 2) , "mvpa", "mvpa_episode_id")), + lowmet = map(data, ~ episode_type_features(.x %>% filter(mets < MET_THRESHOLD) , "lowmet", "met_episode_id")), + highmet = map(data, ~ episode_type_features(.x %>% filter(mets >= MET_THRESHOLD) , "highmet", "met_episode_id")) + ) %>% + ungroup() %>% + select(all_of(c("local_segment", REQUESTED_EPISODES))) %>% + unnest(everything(), keep_empty=TRUE) %>% + select(starts_with(c("local_segment", REQUESTED_FEATURES))) +} \ No newline at end of file diff --git a/src/features/utils/utils.R b/src/features/utils/utils.R index 5ffbe492..02931503 100644 --- a/src/features/utils/utils.R +++ b/src/features/utils/utils.R @@ -11,6 +11,10 @@ filter_data_by_segment <- function(data, time_segment){ mutate(local_segment = str_extract(assigned_segments, paste0("\\[", time_segment, "#", datetime_regex, ",", datetime_regex, ";", timestamp_regex, ",", timestamp_regex, "\\]"))) %>% extract(local_segment, into = c("local_segment", "timestamps_segment"), paste0("\\[(", time_segment, "#", datetime_regex, ",", datetime_regex, ");(", timestamp_regex, ",", timestamp_regex, ")\\]")) %>% select(-assigned_segments) + + # chunk episodes + if (nrow(data) > 0 && all(c("start_timestamp","end_timestamp") %in% colnames(data)) ) + data <- chunk_episodes(data) return(data) } diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 0d839744..317b738d 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -1125,6 +1125,47 @@ properties: INCLUDE_ZERO_STEP_ROWS: type: boolean + + FITBIT_CALORIES_INTRADAY: + type: object + required: [CONTAINER, PROVIDERS] + properties: + CONTAINER: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: [count, sumduration, avgduration, minduration, maxduration, stdduration, starttimefirst, endtimefirst, starttimelast, endtimelast, starttimelongest, endtimelongest, summet, avgmet, maxmet, minmet, stdmet, sumcalories, avgcalories, maxcalories, mincalories, stdcalories] + EPISODE_TYPE: + uniqueItems: True + items: + type: string + enum: [sedentary, lightlyactive, fairlyactive, veryactive, mvpa, lowmet, highmet] + EPISODE_TIME_THRESHOLD: + type: integer + minimum: 1 + EPISODE_MET_THRESHOLD: + type: integer + minimum: 1 + EPISODE_MVPA_CATEGORIES: + uniqueItems: True + items: + type: string + enum: [sedentary, lightlyactive, fairlyactive, veryactive] + EPISODE_REFERENCE_TIME: + type: string + enum: [MIDNIGHT, START_OF_THE_SEGMENT] + additionalProperties: + $ref: "#/definitions/PROVIDER" + HISTOGRAM_PHONE_DATA_YIELD: type: object required: [PLOT]