rapids/src/features/phone_data_yield/rapids/main.R

51 lines
2.4 KiB
R
Raw Normal View History

2020-11-25 01:12:16 +01:00
library("dplyr", warn.conflicts = F)
library(tidyr)
library(readr)
2020-12-03 00:41:03 +01:00
compute_data_yield_features <- function(data, feature_name, time_segment, provider){
Squashed commit of the following: commit 31a47a5ee4569264e39d7c445525a6e64bb7700a Author: Primoz <sisko.primoz@gmail.com> Date: Wed Jul 20 13:49:22 2022 +0000 Environment version change. commit 5b274ed8993f58e783bda6d82fce936764209c28 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 16:10:07 2022 +0000 Enabled cleaning for all participants + standardization files. commit 203fdb31e0f3c647ef8c8a60cb9531831b7ab924 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 14:14:51 2022 +0000 Features cleaning fixes after testing. Visualization script for phone features values. commit 176178d73b154c30b9eb9eb4a67514f00d6a924e Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 09:05:14 2022 +0000 Revert "Necessary config changes." This reverts commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0. commit 26ea6512c9d512f95837e7b047fe510c1d196403 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 13:19:47 2022 +0000 Adding cleaning function condition and cleaning functionality. commit 575c29eef9c21e6f2d7832871e73bc0941643734 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:51:56 2022 +0000 Translation of the cleaning individual RAPIDS function from R to py. commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:02:18 2022 +0000 Necessary config changes. commit b5669f51612fbd8378848615d639677851ab032f Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 15:26:00 2022 +0000 Modified snakemake rule to dynamically choose script extention. commit 66636be1e8ae4828228b37c59b9df1faf3fc3d3d Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 14:43:08 2022 +0000 Trying to modify the snakefile rule to execute scripts in two languages depended on the provider. commit 574778b00f3cbb368ef4bc74de15cf5070c65ea9 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 09:49:41 2022 +0000 gitignore: adding required files so that RAPIDS can be run successfully. commit 71018ab178256970535e78961602ab8c7f0ebb14 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 08:34:19 2022 +0000 Standardization bug fixes commit 6253c470a624e6bfbb02e0c453b652452eb2dbbc Author: Primoz <sisko.primoz@gmail.com> Date: Thu Jul 14 15:28:02 2022 +0000 Seperate rules for empatica vs. nonempatica standardization. Parameter in config that controls the creation of standardized merged files for individual and all participants.. commit 90f902778565e0896d3bae22ae8551be8b487e67 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 12 14:23:03 2022 +0000 Preparing for final csvs' standardization. commit d25dde3998786a9a582f5cda544ee104386778f9 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:47 2022 +0000 Revert "Changes in config to be reverted." This reverts commit bea7608e7095021fb7c53a9afa07074448fe4313. commit 6b23e70857e63deda98eb98d190af9090626c84b Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:26 2022 +0000 Enabled standardization for rest (previously active) phone features. Testing still needed. commit 8ec58a6f34ba3d42e5cc71d26e6d91837472ca5f Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 09:07:55 2022 +0000 Enabled standardization for phone calls. All steps completed and tested. commit bea7608e7095021fb7c53a9afa07074448fe4313 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 07:47:51 2022 +0000 Changes in config to be reverted. commit 4e84ca0e51bf709bff56fd09437b95310ec6bedd Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:24 2022 +0000 Standardization for the rest of the features. commit cc581aa788e3d5c17131af8f3d5dd6b0c3b5aff7 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:08 2022 +0000 README update again
2022-07-20 15:51:22 +02:00
2020-12-03 00:41:03 +01:00
data <- data %>% filter_data_by_segment(time_segment)
Squashed commit of the following: commit 31a47a5ee4569264e39d7c445525a6e64bb7700a Author: Primoz <sisko.primoz@gmail.com> Date: Wed Jul 20 13:49:22 2022 +0000 Environment version change. commit 5b274ed8993f58e783bda6d82fce936764209c28 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 16:10:07 2022 +0000 Enabled cleaning for all participants + standardization files. commit 203fdb31e0f3c647ef8c8a60cb9531831b7ab924 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 14:14:51 2022 +0000 Features cleaning fixes after testing. Visualization script for phone features values. commit 176178d73b154c30b9eb9eb4a67514f00d6a924e Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 09:05:14 2022 +0000 Revert "Necessary config changes." This reverts commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0. commit 26ea6512c9d512f95837e7b047fe510c1d196403 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 13:19:47 2022 +0000 Adding cleaning function condition and cleaning functionality. commit 575c29eef9c21e6f2d7832871e73bc0941643734 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:51:56 2022 +0000 Translation of the cleaning individual RAPIDS function from R to py. commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:02:18 2022 +0000 Necessary config changes. commit b5669f51612fbd8378848615d639677851ab032f Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 15:26:00 2022 +0000 Modified snakemake rule to dynamically choose script extention. commit 66636be1e8ae4828228b37c59b9df1faf3fc3d3d Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 14:43:08 2022 +0000 Trying to modify the snakefile rule to execute scripts in two languages depended on the provider. commit 574778b00f3cbb368ef4bc74de15cf5070c65ea9 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 09:49:41 2022 +0000 gitignore: adding required files so that RAPIDS can be run successfully. commit 71018ab178256970535e78961602ab8c7f0ebb14 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 08:34:19 2022 +0000 Standardization bug fixes commit 6253c470a624e6bfbb02e0c453b652452eb2dbbc Author: Primoz <sisko.primoz@gmail.com> Date: Thu Jul 14 15:28:02 2022 +0000 Seperate rules for empatica vs. nonempatica standardization. Parameter in config that controls the creation of standardized merged files for individual and all participants.. commit 90f902778565e0896d3bae22ae8551be8b487e67 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 12 14:23:03 2022 +0000 Preparing for final csvs' standardization. commit d25dde3998786a9a582f5cda544ee104386778f9 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:47 2022 +0000 Revert "Changes in config to be reverted." This reverts commit bea7608e7095021fb7c53a9afa07074448fe4313. commit 6b23e70857e63deda98eb98d190af9090626c84b Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:26 2022 +0000 Enabled standardization for rest (previously active) phone features. Testing still needed. commit 8ec58a6f34ba3d42e5cc71d26e6d91837472ca5f Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 09:07:55 2022 +0000 Enabled standardization for phone calls. All steps completed and tested. commit bea7608e7095021fb7c53a9afa07074448fe4313 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 07:47:51 2022 +0000 Changes in config to be reverted. commit 4e84ca0e51bf709bff56fd09437b95310ec6bedd Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:24 2022 +0000 Standardization for the rest of the features. commit cc581aa788e3d5c17131af8f3d5dd6b0c3b5aff7 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:08 2022 +0000 README update again
2022-07-20 15:51:22 +02:00
if(nrow(data) == 0){
return(tibble(local_segment = character(), ratiovalidyieldedminutes = numeric(), ratiovalidyieldedhours = numeric()))
Squashed commit of the following: commit 31a47a5ee4569264e39d7c445525a6e64bb7700a Author: Primoz <sisko.primoz@gmail.com> Date: Wed Jul 20 13:49:22 2022 +0000 Environment version change. commit 5b274ed8993f58e783bda6d82fce936764209c28 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 16:10:07 2022 +0000 Enabled cleaning for all participants + standardization files. commit 203fdb31e0f3c647ef8c8a60cb9531831b7ab924 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 14:14:51 2022 +0000 Features cleaning fixes after testing. Visualization script for phone features values. commit 176178d73b154c30b9eb9eb4a67514f00d6a924e Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 19 09:05:14 2022 +0000 Revert "Necessary config changes." This reverts commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0. commit 26ea6512c9d512f95837e7b047fe510c1d196403 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 13:19:47 2022 +0000 Adding cleaning function condition and cleaning functionality. commit 575c29eef9c21e6f2d7832871e73bc0941643734 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:51:56 2022 +0000 Translation of the cleaning individual RAPIDS function from R to py. commit 6ec1ef50430d2e1f5ce4670d505d5e84ac47f0a0 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 18 12:02:18 2022 +0000 Necessary config changes. commit b5669f51612fbd8378848615d639677851ab032f Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 15:26:00 2022 +0000 Modified snakemake rule to dynamically choose script extention. commit 66636be1e8ae4828228b37c59b9df1faf3fc3d3d Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 14:43:08 2022 +0000 Trying to modify the snakefile rule to execute scripts in two languages depended on the provider. commit 574778b00f3cbb368ef4bc74de15cf5070c65ea9 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 09:49:41 2022 +0000 gitignore: adding required files so that RAPIDS can be run successfully. commit 71018ab178256970535e78961602ab8c7f0ebb14 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 15 08:34:19 2022 +0000 Standardization bug fixes commit 6253c470a624e6bfbb02e0c453b652452eb2dbbc Author: Primoz <sisko.primoz@gmail.com> Date: Thu Jul 14 15:28:02 2022 +0000 Seperate rules for empatica vs. nonempatica standardization. Parameter in config that controls the creation of standardized merged files for individual and all participants.. commit 90f902778565e0896d3bae22ae8551be8b487e67 Author: Primoz <sisko.primoz@gmail.com> Date: Tue Jul 12 14:23:03 2022 +0000 Preparing for final csvs' standardization. commit d25dde3998786a9a582f5cda544ee104386778f9 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:47 2022 +0000 Revert "Changes in config to be reverted." This reverts commit bea7608e7095021fb7c53a9afa07074448fe4313. commit 6b23e70857e63deda98eb98d190af9090626c84b Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 12:08:26 2022 +0000 Enabled standardization for rest (previously active) phone features. Testing still needed. commit 8ec58a6f34ba3d42e5cc71d26e6d91837472ca5f Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 09:07:55 2022 +0000 Enabled standardization for phone calls. All steps completed and tested. commit bea7608e7095021fb7c53a9afa07074448fe4313 Author: Primoz <sisko.primoz@gmail.com> Date: Mon Jul 11 07:47:51 2022 +0000 Changes in config to be reverted. commit 4e84ca0e51bf709bff56fd09437b95310ec6bedd Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:24 2022 +0000 Standardization for the rest of the features. commit cc581aa788e3d5c17131af8f3d5dd6b0c3b5aff7 Author: Primoz <sisko.primoz@gmail.com> Date: Fri Jul 8 14:11:08 2022 +0000 README update again
2022-07-20 15:51:22 +02:00
}
2020-11-25 01:12:16 +01:00
features <- data %>%
separate(timestamps_segment, into = c("start_timestamp", "end_timestamp"), convert = T, sep = ",") %>%
mutate(duration_minutes = (end_timestamp - start_timestamp) / 60000,
timestamp_since_segment_start = timestamp - start_timestamp,
minute_bin = timestamp_since_segment_start %/% 60000, # 60 * 1000
hour_bin = timestamp_since_segment_start %/% 3600000) %>% # (60 * 60 * 1000)
group_by(local_segment, hour_bin) %>%
summarise(minute_count = n_distinct(minute_bin),
duration_minutes = first(duration_minutes),
valid_hour = (minute_count/min(duration_minutes, 60)) > provider$MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS) %>%
group_by(local_segment) %>%
summarise(valid_yielded_minutes = sum(minute_count),
valid_yielded_hours = sum(valid_hour == TRUE) / 1.0,
duration_minutes = first(duration_minutes),
duration_hours = duration_minutes / 60.0,
ratiovalidyieldedminutes = min( valid_yielded_minutes / duration_minutes, 1),
ratiovalidyieldedhours = if_else(duration_hours > 1, min( valid_yielded_hours / duration_hours, 1), valid_yielded_hours))
2020-11-25 01:12:16 +01:00
return(features)
}
2020-12-03 00:41:03 +01:00
rapids_features <- function(sensor_data_files, time_segment, provider){
2020-11-25 01:12:16 +01:00
yield_data <- read_csv(sensor_data_files[["sensor_data"]], col_types = cols_only(timestamp ="d", assigned_segments = "c"))
requested_features <- provider[["FEATURES"]]
# Output dataframe
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
# The name of the features this function can compute
base_features_names <- c("ratiovalidyieldedminutes", "ratiovalidyieldedhours")
# The subset of requested features this function can compute
features_to_compute <- intersect(base_features_names, requested_features)
2020-12-03 00:41:03 +01:00
features <- compute_data_yield_features(yield_data, feature_name, time_segment, provider) %>%
select(c("local_segment", features_to_compute))
2020-11-25 01:12:16 +01:00
return(features)
}