diff --git a/Snakefile b/Snakefile index cbd8cb60..7082fc10 100644 --- a/Snakefile +++ b/Snakefile @@ -66,10 +66,10 @@ for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys(): for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys(): if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]: - if "PHONE_SCREEN" in config["PHONE_DATA_YIELD"]["SENSORS"]: - files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) - else: - raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)") + # if "PHONE_SCREEN" in config["PHONE_DATA_YIELD"]["SENSORS"]:# not used for now because we took episodepersensedminutes out of the list of supported features + # files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"])) + # else: + # raise ValueError("Error: Add PHONE_SCREEN (and as many PHONE_SENSORS as you have in your database) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_yielded_timestamps (time when the smartphone was sensing data)") files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"])) @@ -127,9 +127,9 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys(): if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED": if "PHONE_LOCATIONS" in config["PHONE_DATA_YIELD"]["SENSORS"]: - files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"])) else: - raise ValueError("Error: Add PHONE_LOCATIONS (and as many SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)") + raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_yielded_timestamps (time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)") files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"])) diff --git a/config.yaml b/config.yaml index eb1b3f09..73db75c4 100644 --- a/config.yaml +++ b/config.yaml @@ -55,10 +55,10 @@ DEVICE_DATA: ################################################################################ PHONE_DATA_YIELD: - SENSORS: [PHONE_MESSAGES, PHONE_CALLS, PHONE_ACCELEROMETER] + SENSORS: [] PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least SRC_LANGUAGE: "r" diff --git a/docs/features/add-new-features.md b/docs/features/add-new-features.md index d718b474..b5c3019c 100644 --- a/docs/features/add-new-features.md +++ b/docs/features/add-new-features.md @@ -139,7 +139,7 @@ The code to extract your behavioral features should be implemented in your provi - One row per day segment instance (e.g. 14 our `p01`'s `my_days` example) - The `local_segment` column added by `filter_data_by_segment()` - - One column per feature. Your feature columns should be named `SENSOR_PROVIDER_FEATURE`, for example `accelerometr_vega_feature1` + - One column per feature. By convention the name of your features should only contain letters or numbers (`feature1`). RAPIDS will automatically add the right sensor and provider prefix (`accelerometr_vega_`) ??? example "`PHONE_ACCELEROMETER` Provider Example" For your reference, this a short example of our own provider (`RAPIDS`) for `PHONE_ACCELEROMETER` that computes five acceleration features diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index 5b4881ee..eb539ca7 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -80,6 +80,8 @@ rule phone_readable_datetime: rule phone_yielded_timestamps: input: all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])) + params: + sensors = config["PHONE_DATA_YIELD"]["SENSORS"] # not used but needed so the rule is triggered if this array changes output: "data/interim/{pid}/phone_yielded_timestamps.csv" script: @@ -99,18 +101,6 @@ rule phone_yielded_timestamps_with_datetime: script: "../src/data/readable_datetime.R" -rule phone_valid_sensed_days: - input: - phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv" - params: - min_valid_hours_per_day = "{min_valid_hours_per_day}", - min_valid_bins_per_hour = "{min_valid_bins_per_hour}" - output: - "data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv" - script: - "../src/data/phone_valid_sensed_days.R" - - rule unify_ios_android: input: sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv", @@ -125,7 +115,7 @@ rule unify_ios_android: rule process_phone_locations_types: input: locations = "data/raw/{pid}/phone_locations_raw.csv", - phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv", + phone_sensed_timestamps = "data/interim/{pid}/phone_yielded_timestamps.csv", params: consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"], time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"], diff --git a/src/features/phone_bluetooth/rapids/main.R b/src/features/phone_bluetooth/rapids/main.R index 3389b7de..3f8af8c1 100644 --- a/src/features/phone_bluetooth/rapids/main.R +++ b/src/features/phone_bluetooth/rapids/main.R @@ -6,8 +6,8 @@ compute_bluetooth_feature <- function(data, feature, day_segment){ if(feature %in% c("countscans", "uniquedevices")){ data <- data %>% group_by(local_segment) data <- switch(feature, - "countscans" = data %>% summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n()), - "uniquedevices" = data %>% summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n_distinct(bt_address))) + "countscans" = data %>% summarise(!!feature := n()), + "uniquedevices" = data %>% summarise(!!feature := n_distinct(bt_address))) return(data) } else if(feature == "countscansmostuniquedevice"){ # Get the most scanned device @@ -22,7 +22,7 @@ compute_bluetooth_feature <- function(data, feature, day_segment){ return(data %>% filter(bt_address == mostuniquedevice) %>% group_by(local_segment) %>% - summarise(!!paste("bluetooth_rapids", feature, sep = "_") := n()) %>% + summarise(!!feature := n()) %>% replace(is.na(.), 0)) } } @@ -46,7 +46,7 @@ rapids_features <- function(sensor_data_files, day_segment, provider){ features <- merge(features, feature, by="local_segment", all = TRUE) } - features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0))) + features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0))) %>% select(-local_segment) return(features) } \ No newline at end of file diff --git a/src/features/phone_calls/rapids/main.R b/src/features/phone_calls/rapids/main.R index 3c383276..1bd1c897 100644 --- a/src/features/phone_calls/rapids/main.R +++ b/src/features/phone_calls/rapids/main.R @@ -20,7 +20,7 @@ call_features_of_type <- function(calls, call_type, day_segment, requested_featu if(length(features_to_compute) == 0) return(features) if(nrow(calls) < 1) - return(cbind(features, read.csv(text = paste(paste("calls_rapids", call_type, features_to_compute, sep = "_"), collapse = ","), stringsAsFactors = FALSE))) + return(cbind(features, read.csv(text = paste(paste(call_type, features_to_compute, sep = "_"), collapse = ","), stringsAsFactors = FALSE))) for(feature_name in features_to_compute){ if(feature_name == "countmostfrequentcontact"){ @@ -34,24 +34,24 @@ call_features_of_type <- function(calls, call_type, day_segment, requested_featu pull(trace) feature <- calls %>% group_by(local_segment) %>% - summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := sum(trace == mostfrequentcontact)) + summarise(!!paste(call_type, feature_name, sep = "_") := sum(trace == mostfrequentcontact)) features <- merge(features, feature, by="local_segment", all = TRUE) } else { feature <- calls %>% group_by(local_segment) feature <- switch(feature_name, - "count" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := n()), - "distinctcontacts" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := n_distinct(trace)), - "meanduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := mean(call_duration)), - "sumduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := sum(call_duration)), - "minduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := min(call_duration)), - "maxduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := max(call_duration)), - "stdduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := sd(call_duration)), - "modeduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := Mode(call_duration)), - "entropyduration" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := entropy.MillerMadow(call_duration)), - "timefirstcall" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := first(local_hour) * 60 + first(local_minute)), - "timelastcall" = feature %>% summarise(!!paste("calls_rapids", call_type, feature_name, sep = "_") := last(local_hour) * 60 + last(local_minute))) + "count" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := n()), + "distinctcontacts" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := n_distinct(trace)), + "meanduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := mean(call_duration)), + "sumduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := sum(call_duration)), + "minduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := min(call_duration)), + "maxduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := max(call_duration)), + "stdduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := sd(call_duration)), + "modeduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := Mode(call_duration)), + "entropyduration" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := entropy.MillerMadow(call_duration)), + "timefirstcall" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := first(local_hour) * 60 + first(local_minute)), + "timelastcall" = feature %>% summarise(!!paste(call_type, feature_name, sep = "_") := last(local_hour) * 60 + last(local_minute))) features <- merge(features, feature, by="local_segment", all = TRUE) } diff --git a/src/features/phone_data_yield/rapids/main.R b/src/features/phone_data_yield/rapids/main.R index 6a178ded..b51fd8fa 100644 --- a/src/features/phone_data_yield/rapids/main.R +++ b/src/features/phone_data_yield/rapids/main.R @@ -19,8 +19,8 @@ compute_data_yield_features <- function(data, feature_name, day_segment, provide valid_yielded_hours = sum(valid_hour == TRUE) / 1.0, duration_minutes = first(duration_minutes), duration_hours = duration_minutes / 60.0, - phone_data_yield_rapids_ratiovalidyieldedminutes = valid_yielded_minutes / duration_minutes, - phone_data_yield_rapids_ratiovalidyieldedhours = if_else(duration_hours > 1, valid_yielded_hours / duration_hours, valid_yielded_hours)) + ratiovalidyieldedminutes = valid_yielded_minutes / duration_minutes, + ratiovalidyieldedhours = if_else(duration_hours > 1, valid_yielded_hours / duration_hours, valid_yielded_hours)) return(features) } @@ -41,7 +41,7 @@ rapids_features <- function(sensor_data_files, day_segment, provider){ features_to_compute <- intersect(base_features_names, requested_features) features <- compute_data_yield_features(yield_data, feature_name, day_segment, provider) %>% - select(c("local_segment", paste0("phone_data_yield_rapids_", features_to_compute))) + select(c("local_segment", features_to_compute)) return(features) } \ No newline at end of file diff --git a/src/features/phone_locations/barnett/main.R b/src/features/phone_locations/barnett/main.R index ad544e1b..b4454464 100644 --- a/src/features/phone_locations/barnett/main.R +++ b/src/features/phone_locations/barnett/main.R @@ -8,22 +8,22 @@ sapply(file.sources,source,.GlobalEnv) create_empty_file <- function(requested_features){ return(data.frame(local_segment= character(), - locations_barnett_hometime= numeric(), - locations_barnett_disttravelled= numeric(), - locations_barnett_rog= numeric(), - locations_barnett_maxdiam= numeric(), - locations_barnett_maxhomedist= numeric(), - locations_barnett_siglocsvisited= numeric(), - locations_barnett_avgflightlen= numeric(), - locations_barnett_stdflightlen= numeric(), - locations_barnett_avgflightdur= numeric(), - locations_barnett_stdflightdur= numeric(), - locations_barnett_probpause= numeric(), - locations_barnett_siglocentropy= numeric(), - locations_barnett_minsmissing= numeric(), - locations_barnett_circdnrtn= numeric(), - locations_barnett_wkenddayrtn= numeric(), - locations_barnett_minutes_data_used= numeric() + hometime= numeric(), + disttravelled= numeric(), + rog= numeric(), + maxdiam= numeric(), + maxhomedist= numeric(), + siglocsvisited= numeric(), + avgflightlen= numeric(), + stdflightlen= numeric(), + avgflightdur= numeric(), + stdflightdur= numeric(), + probpause= numeric(), + siglocentropy= numeric(), + minsmissing= numeric(), + circdnrtn= numeric(), + wkenddayrtn= numeric(), + minutes_data_used= numeric() ) %>% select(all_of(requested_features))) } @@ -41,9 +41,9 @@ barnett_features <- function(sensor_data_files, day_segment, params){ available_features <- c("hometime","disttravelled","rog","maxdiam", "maxhomedist","siglocsvisited","avgflightlen", "stdflightlen", "avgflightdur","stdflightdur", "probpause","siglocentropy","minsmissing", "circdnrtn","wkenddayrtn") requested_features <- intersect(unlist(params["FEATURES"], use.names = F), available_features) - requested_features <- c("local_segment", paste("locations_barnett", requested_features, sep = "_")) + requested_features <- c("local_segment", requested_features) if(minutes_data_used) - requested_features <- c(requested_features, "locations_barnett_minutes_data_used") + requested_features <- c(requested_features, "minutes_data_used") # Excludes datasets with less than 24 hours of data if(max(location$timestamp) - min(location$timestamp) < 86400000) @@ -67,8 +67,8 @@ barnett_features <- function(sensor_data_files, day_segment, params){ group_by(local_date, local_hour) %>% summarise(n_minutes = n_distinct(local_minute)) %>% group_by(local_date) %>% - summarise(locations_barnett_minutes_data_used = sum(n_minutes)) %>% - select(local_date, locations_barnett_minutes_data_used) + summarise(minutes_data_used = sum(n_minutes)) %>% + select(local_date, minutes_data_used) # Save day segment to attach it later location_dates_segments <- location %>% select(local_date, local_segment) %>% distinct(local_date, .keep_all = TRUE) @@ -89,7 +89,7 @@ barnett_features <- function(sensor_data_files, day_segment, params){ features <- cbind(rownames(outputMobility$featavg), outputMobility$featavg) features <- as.data.frame(features) features[-1] <- lapply(lapply(features[-1], as.character), as.numeric) - colnames(features)=c("local_date",tolower(paste("locations_barnett", colnames(outputMobility$featavg), sep = "_"))) + colnames(features)=c("local_date",tolower(colnames(outputMobility$featavg))) # Add the minute count column features <- left_join(features, location_minutes_used, by = "local_date") # Add the day segment column for consistency diff --git a/src/features/phone_messages/rapids/main.R b/src/features/phone_messages/rapids/main.R index 457dc4be..ac566016 100644 --- a/src/features/phone_messages/rapids/main.R +++ b/src/features/phone_messages/rapids/main.R @@ -15,7 +15,7 @@ message_features_of_type <- function(messages, messages_type, day_segment, reque if(length(features_to_compute) == 0) return(features) if(nrow(messages) < 1) - return(cbind(features, read.csv(text = paste(paste("messages_rapids", messages_type, features_to_compute, sep = "_"), collapse = ","), stringsAsFactors = FALSE))) + return(cbind(features, read.csv(text = paste(paste(messages_type, features_to_compute, sep = "_"), collapse = ","), stringsAsFactors = FALSE))) for(feature_name in features_to_compute){ if(feature_name == "countmostfrequentcontact"){ @@ -29,17 +29,17 @@ message_features_of_type <- function(messages, messages_type, day_segment, reque pull(trace) feature <- messages %>% group_by(local_segment) %>% - summarise(!!paste("messages_rapids", messages_type, feature_name, sep = "_") := sum(trace == mostfrequentcontact)) + summarise(!!paste(messages_type, feature_name, sep = "_") := sum(trace == mostfrequentcontact)) features <- merge(features, feature, by="local_segment", all = TRUE) } else { feature <- messages %>% group_by(local_segment) feature <- switch(feature_name, - "count" = feature %>% summarise(!!paste("messages_rapids", messages_type, feature_name, sep = "_") := n()), - "distinctcontacts" = feature %>% summarise(!!paste("messages_rapids", messages_type, feature_name, sep = "_") := n_distinct(trace)), - "timefirstmessage" = feature %>% summarise(!!paste("messages_rapids", messages_type, feature_name, sep = "_") := first(local_hour) * 60 + first(local_minute)), - "timelastmessage" = feature %>% summarise(!!paste("messages_rapids", messages_type, feature_name, sep = "_") := last(local_hour) * 60 + last(local_minute))) + "count" = feature %>% summarise(!!paste(messages_type, feature_name, sep = "_") := n()), + "distinctcontacts" = feature %>% summarise(!!paste(messages_type, feature_name, sep = "_") := n_distinct(trace)), + "timefirstmessage" = feature %>% summarise(!!paste(messages_type, feature_name, sep = "_") := first(local_hour) * 60 + first(local_minute)), + "timelastmessage" = feature %>% summarise(!!paste(messages_type, feature_name, sep = "_") := last(local_hour) * 60 + last(local_minute))) features <- merge(features, feature, by="local_segment", all = TRUE) } @@ -57,7 +57,7 @@ rapids_features <- function(sensor_data_files, day_segment, provider){ # Filter rows that belong to the message type and day segment of interest message_type_label = ifelse(message_type == "received", "1", ifelse(message_type == "sent", "2", NA)) if(is.na(message_type_label)) - stop(paste("Message type can online be received or sent but instead you typed: ", message_type, " in config[MESSAGES][MESSAGES_TYPES]")) + stop(paste("Message type can online be received or sent but instead you typed: ", message_type, " in config[PHONE_MESSAGES][MESSAGES_TYPES]")) requested_features <- provider[["FEATURES"]][[message_type]] messages_of_type <- messages_data %>% filter(message_type == message_type_label) diff --git a/src/features/phone_wifi_connected/rapids/main.R b/src/features/phone_wifi_connected/rapids/main.R index eb2f1a9c..84513249 100644 --- a/src/features/phone_wifi_connected/rapids/main.R +++ b/src/features/phone_wifi_connected/rapids/main.R @@ -5,8 +5,8 @@ compute_wifi_feature <- function(data, feature, day_segment){ if(feature %in% c("countscans", "uniquedevices")){ data <- data %>% group_by(local_segment) data <- switch(feature, - "countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()), - "uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid))) + "countscans" = data %>% summarise(!!feature := n()), + "uniquedevices" = data %>% summarise(!!feature := n_distinct(bssid))) return(data) } else if(feature == "countscansmostuniquedevice"){ # Get the most scanned device @@ -20,7 +20,7 @@ compute_wifi_feature <- function(data, feature, day_segment){ return(data %>% filter(bssid == mostuniquedevice) %>% group_by(local_segment) %>% - summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>% + summarise(!!feature := n()) %>% replace(is.na(.), 0)) } } diff --git a/src/features/phone_wifi_visible/rapids/main.R b/src/features/phone_wifi_visible/rapids/main.R index eb2f1a9c..84513249 100644 --- a/src/features/phone_wifi_visible/rapids/main.R +++ b/src/features/phone_wifi_visible/rapids/main.R @@ -5,8 +5,8 @@ compute_wifi_feature <- function(data, feature, day_segment){ if(feature %in% c("countscans", "uniquedevices")){ data <- data %>% group_by(local_segment) data <- switch(feature, - "countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()), - "uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid))) + "countscans" = data %>% summarise(!!feature := n()), + "uniquedevices" = data %>% summarise(!!feature := n_distinct(bssid))) return(data) } else if(feature == "countscansmostuniquedevice"){ # Get the most scanned device @@ -20,7 +20,7 @@ compute_wifi_feature <- function(data, feature, day_segment){ return(data %>% filter(bssid == mostuniquedevice) %>% group_by(local_segment) %>% - summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>% + summarise(!!feature := n()) %>% replace(is.na(.), 0)) } } diff --git a/src/features/utils/utils.R b/src/features/utils/utils.R index 3ff3caac..3a03bafb 100644 --- a/src/features/utils/utils.R +++ b/src/features/utils/utils.R @@ -61,12 +61,9 @@ fetch_provider_features <- function(provider, provider_key, sensor_key, sensor_d print(paste(rapids_log_tag,"Processing", sensor_key, provider_key, day_segment)) features <- features_function(sensor_data_files, day_segment, provider) - - # Check all features names contain the provider key so they are unique - features_names <- colnames(features %>% select(-local_segment)) - if(any(!grepl(paste0(".*(",str_to_lower(provider_key),").*"), features_names))) - stop(paste("The name of all calls features of", provider_key," must contain its name in lower case but the following don't [", paste(features_names[!grepl(paste0(".*(",str_to_lower(provider_key),").*"), features_names)], collapse = ", "), "]")) - + if(!"local_segment" %in% colnames(features)) + stop(paste0("The dataframe returned by the ",sensor_key," provider '", provider_key,"' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (", code_path,")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different day segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)")) + features <- features %>% rename_at(vars(!matches("local_segment")), ~ paste(sensor_key, provider_key, ., sep = "_")) sensor_features <- merge(sensor_features, features, all = TRUE) } } else { # This is redundant, if COMPUTE is FALSE this script will be never executed