From ab1a3dbf792c1395785b072041e9d2ec317f1448 Mon Sep 17 00:00:00 2001 From: JulioV Date: Wed, 3 Mar 2021 09:14:07 -0500 Subject: [PATCH] Move phone_activity_recognition to aware_mysql stream --- Snakefile | 1 - docs/features/phone-activity-recognition.md | 1 - example_profile/Snakefile | 1 - rules/features.smk | 4 +- src/data/streams/aware_mysql/format.yaml | 20 +++++++ .../activity_recogniton_ios_unification.R | 58 +++++++++++++++++++ src/data/streams/rapids_columns.yaml | 8 +++ tests/Snakefile | 1 - 8 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R diff --git a/Snakefile b/Snakefile index 136773c0..8adfa2e7 100644 --- a/Snakefile +++ b/Snakefile @@ -65,7 +65,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys(): if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) diff --git a/docs/features/phone-activity-recognition.md b/docs/features/phone-activity-recognition.md index e2b791b7..e28d11ab 100644 --- a/docs/features/phone-activity-recognition.md +++ b/docs/features/phone-activity-recognition.md @@ -18,7 +18,6 @@ Sensor parameters description for `[PHONE_ACTIVITY_RECOGNITION]`: ```bash - data/raw/{pid}/phone_activity_recognition_raw.csv - data/raw/{pid}/phone_activity_recognition_with_datetime.csv - - data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv - data/interim/{pid}/phone_activity_recognition_episodes.csv - data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv - data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv diff --git a/example_profile/Snakefile b/example_profile/Snakefile index acb3b1e1..16b62caa 100644 --- a/example_profile/Snakefile +++ b/example_profile/Snakefile @@ -55,7 +55,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys(): if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) diff --git a/rules/features.smk b/rules/features.smk index 77190b04..01ee50f7 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -62,9 +62,9 @@ rule phone_accelerometer_r_features: rule activity_recognition_episodes: input: - sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv" + sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime.csv" params: - episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"] + episode_threshold_between_rows = config["PHONE_ACTIVITY_RECOGNITION"]["EPISODE_THRESHOLD_BETWEEN_ROWS"] output: "data/interim/{pid}/phone_activity_recognition_episodes.csv" script: diff --git a/src/data/streams/aware_mysql/format.yaml b/src/data/streams/aware_mysql/format.yaml index 0bac19e4..01dad282 100644 --- a/src/data/streams/aware_mysql/format.yaml +++ b/src/data/streams/aware_mysql/format.yaml @@ -16,3 +16,23 @@ PHONE_ACCELEROMETER: DOUBLE_VALUES_2: double_values_2 MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data +PHONE_ACTIVITY_RECOGNITION: + ANDROID: + COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_TYPE: activity_type + ACTIVITY_NAME: activity_name + CONFIDENCE: confidence + MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + ACTIVITY_TYPE: FLAG_TO_MUTATE + ACTIVITY_NAME: FLAG_TO_MUTATE + CONFIDENCE: confidence + FLAG_AS_EXTRA: activities + MUTATION_SCRIPTS: + - "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R" + diff --git a/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R b/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R new file mode 100644 index 00000000..b5d49eae --- /dev/null +++ b/src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R @@ -0,0 +1,58 @@ +source("renv/activate.R") +library("dplyr", warn.conflicts = F) +library(stringr) + +clean_ios_activity_column <- function(ios_gar){ + ios_gar <- ios_gar %>% + mutate(activities = str_replace_all(activities, pattern = '("|\\[|\\])', replacement = "")) + + existent_multiple_activities <- ios_gar %>% + filter(str_detect(activities, ",")) %>% + group_by(activities) %>% + summarise(mutiple_activities = unique(activities), .groups = "drop_last") %>% + pull(mutiple_activities) + + known_multiple_activities <- c("stationary,automotive") + unkown_multiple_actvities <- setdiff(existent_multiple_activities, known_multiple_activities) + if(length(unkown_multiple_actvities) > 0){ + stop(paste0("There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: ", unkown_multiple_actvities)) + } + + ios_gar <- ios_gar %>% + mutate(activities = str_replace_all(activities, pattern = "stationary,automotive", replacement = "automotive")) + + return(ios_gar) +} + +unify_ios_activity_recognition <- function(ios_gar){ + # We only need to unify Google Activity Recognition data for iOS + # discard rows where activities column is blank + ios_gar <- ios_gar[-which(ios_gar$activities == ""), ] + # clean "activities" column of ios_gar + ios_gar <- clean_ios_activity_column(ios_gar) + + # make it compatible with android version: generate "activity_name" and "activity_type" columns + ios_gar <- ios_gar %>% + mutate(activity_name = case_when(activities == "automotive" ~ "in_vehicle", + activities == "cycling" ~ "on_bicycle", + activities == "walking" ~ "walking", + activities == "running" ~ "running", + activities == "stationary" ~ "still"), + activity_type = case_when(activities == "automotive" ~ 0, + activities == "cycling" ~ 1, + activities == "walking" ~ 7, + activities == "running" ~ 8, + activities == "stationary" ~ 3, + activities == "unknown" ~ 4), + confidence = case_when(confidence == 0 ~ 0, + confidence == 1 ~ 50, + confidence == 2 ~ 100) + ) %>% + select(-activities) + + return(ios_gar) +} + +main <- function(data){ + return(unify_ios_activity_recognition(data)) +} \ No newline at end of file diff --git a/src/data/streams/rapids_columns.yaml b/src/data/streams/rapids_columns.yaml index f05fbb15..3992e9ba 100644 --- a/src/data/streams/rapids_columns.yaml +++ b/src/data/streams/rapids_columns.yaml @@ -4,3 +4,11 @@ PHONE_ACCELEROMETER: - DOUBLE_VALUES_0 - DOUBLE_VALUES_1 - DOUBLE_VALUES_2 + +PHONE_ACTIVITY_RECOGNITION: + - TIMESTAMP + - DEVICE_ID + - ACTIVITY_TYPE + - ACTIVITY_NAME + - CONFIDENCE + diff --git a/tests/Snakefile b/tests/Snakefile index 8bc2b96c..bf3e769f 100644 --- a/tests/Snakefile +++ b/tests/Snakefile @@ -63,7 +63,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys(): if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) - files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))