Move phone_activity_recognition to aware_mysql stream

pull/128/head
JulioV 2021-03-03 09:14:07 -05:00
parent dc11cb593d
commit ab1a3dbf79
8 changed files with 88 additions and 6 deletions

View File

@ -65,7 +65,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))

View File

@ -18,7 +18,6 @@ Sensor parameters description for `[PHONE_ACTIVITY_RECOGNITION]`:
```bash ```bash
- data/raw/{pid}/phone_activity_recognition_raw.csv - data/raw/{pid}/phone_activity_recognition_raw.csv
- data/raw/{pid}/phone_activity_recognition_with_datetime.csv - data/raw/{pid}/phone_activity_recognition_with_datetime.csv
- data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv
- data/interim/{pid}/phone_activity_recognition_episodes.csv - data/interim/{pid}/phone_activity_recognition_episodes.csv
- data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv - data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv
- data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv - data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv

View File

@ -55,7 +55,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))

View File

@ -62,9 +62,9 @@ rule phone_accelerometer_r_features:
rule activity_recognition_episodes: rule activity_recognition_episodes:
input: input:
sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv" sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime.csv"
params: params:
episode_threshold_between_rows = config["PHONE_BATTERY"]["EPISODE_THRESHOLD_BETWEEN_ROWS"] episode_threshold_between_rows = config["PHONE_ACTIVITY_RECOGNITION"]["EPISODE_THRESHOLD_BETWEEN_ROWS"]
output: output:
"data/interim/{pid}/phone_activity_recognition_episodes.csv" "data/interim/{pid}/phone_activity_recognition_episodes.csv"
script: script:

View File

@ -16,3 +16,23 @@ PHONE_ACCELEROMETER:
DOUBLE_VALUES_2: double_values_2 DOUBLE_VALUES_2: double_values_2
MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data
PHONE_ACTIVITY_RECOGNITION:
ANDROID:
COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
ACTIVITY_TYPE: activity_type
ACTIVITY_NAME: activity_name
CONFIDENCE: confidence
MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data
IOS:
COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
ACTIVITY_TYPE: FLAG_TO_MUTATE
ACTIVITY_NAME: FLAG_TO_MUTATE
CONFIDENCE: confidence
FLAG_AS_EXTRA: activities
MUTATION_SCRIPTS:
- "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R"

View File

@ -0,0 +1,58 @@
source("renv/activate.R")
library("dplyr", warn.conflicts = F)
library(stringr)
clean_ios_activity_column <- function(ios_gar){
ios_gar <- ios_gar %>%
mutate(activities = str_replace_all(activities, pattern = '("|\\[|\\])', replacement = ""))
existent_multiple_activities <- ios_gar %>%
filter(str_detect(activities, ",")) %>%
group_by(activities) %>%
summarise(mutiple_activities = unique(activities), .groups = "drop_last") %>%
pull(mutiple_activities)
known_multiple_activities <- c("stationary,automotive")
unkown_multiple_actvities <- setdiff(existent_multiple_activities, known_multiple_activities)
if(length(unkown_multiple_actvities) > 0){
stop(paste0("There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: ", unkown_multiple_actvities))
}
ios_gar <- ios_gar %>%
mutate(activities = str_replace_all(activities, pattern = "stationary,automotive", replacement = "automotive"))
return(ios_gar)
}
unify_ios_activity_recognition <- function(ios_gar){
# We only need to unify Google Activity Recognition data for iOS
# discard rows where activities column is blank
ios_gar <- ios_gar[-which(ios_gar$activities == ""), ]
# clean "activities" column of ios_gar
ios_gar <- clean_ios_activity_column(ios_gar)
# make it compatible with android version: generate "activity_name" and "activity_type" columns
ios_gar <- ios_gar %>%
mutate(activity_name = case_when(activities == "automotive" ~ "in_vehicle",
activities == "cycling" ~ "on_bicycle",
activities == "walking" ~ "walking",
activities == "running" ~ "running",
activities == "stationary" ~ "still"),
activity_type = case_when(activities == "automotive" ~ 0,
activities == "cycling" ~ 1,
activities == "walking" ~ 7,
activities == "running" ~ 8,
activities == "stationary" ~ 3,
activities == "unknown" ~ 4),
confidence = case_when(confidence == 0 ~ 0,
confidence == 1 ~ 50,
confidence == 2 ~ 100)
) %>%
select(-activities)
return(ios_gar)
}
main <- function(data){
return(unify_ios_activity_recognition(data))
}

View File

@ -4,3 +4,11 @@ PHONE_ACCELEROMETER:
- DOUBLE_VALUES_0 - DOUBLE_VALUES_0
- DOUBLE_VALUES_1 - DOUBLE_VALUES_1
- DOUBLE_VALUES_2 - DOUBLE_VALUES_2
PHONE_ACTIVITY_RECOGNITION:
- TIMESTAMP
- DEVICE_ID
- ACTIVITY_TYPE
- ACTIVITY_NAME
- CONFIDENCE

View File

@ -63,7 +63,6 @@ for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))