Move phone_conversation to aware_mysql stream

pull/128/head
JulioV 2021-03-03 09:14:47 -05:00
parent ab1a3dbf79
commit 6c51c6c239
8 changed files with 51 additions and 6 deletions

View File

@ -151,7 +151,6 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -17,7 +17,6 @@ Sensor parameters description for `[PHONE_CONVERSATION]`:
```bash ```bash
- data/raw/{pid}/phone_conversation_raw.csv - data/raw/{pid}/phone_conversation_raw.csv
- data/raw/{pid}/phone_conversation_with_datetime.csv - data/raw/{pid}/phone_conversation_with_datetime.csv
- data/raw/{pid}/phone_conversation_with_datetime_unified.csv
- data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv - data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv
- data/processed/features/{pid}/phone_conversation.csv - data/processed/features/{pid}/phone_conversation.csv
``` ```

View File

@ -137,7 +137,6 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -290,7 +290,7 @@ rule calls_r_features:
rule conversation_python_features: rule conversation_python_features:
input: input:
sensor_data = "data/raw/{pid}/phone_conversation_with_datetime_unified.csv", sensor_data = "data/raw/{pid}/phone_conversation_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()],
@ -303,7 +303,7 @@ rule conversation_python_features:
rule conversation_r_features: rule conversation_r_features:
input: input:
sensor_data = "data/raw/{pid}/phone_conversation_with_datetime_unified.csv", sensor_data = "data/raw/{pid}/phone_conversation_with_datetime.csv",
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -36,3 +36,25 @@ PHONE_ACTIVITY_RECOGNITION:
MUTATION_SCRIPTS: MUTATION_SCRIPTS:
- "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R" - "src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R"
PHONE_CONVERSATION:
ANDROID:
COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
DATATYPE: datatype
DOUBLE_ENERGY: double_energy
INFERENCE: inference
DOUBLE_CONVO_START: double_convo_start
DOUBLE_CONVO_END: double_convo_end
MUTATION_SCRIPTS: # List any python or r scripts that mutate your raw data
IOS:
COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
DATATYPE: datatype
DOUBLE_ENERGY: double_energy
INFERENCE: inference
DOUBLE_CONVO_START: double_convo_start
DOUBLE_CONVO_END: double_convo_end
MUTATION_SCRIPTS:
- "src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R"

View File

@ -0,0 +1,19 @@
source("renv/activate.R")
library("dplyr", warn.conflicts = F)
unify_ios_conversation <- function(conversation){
if(nrow(conversation) > 0){
duration_check <- conversation %>%
select(double_convo_start, double_convo_end) %>%
mutate(start_is_seconds = double_convo_start <= 9999999999,
end_is_seconds = double_convo_end <= 9999999999) # Values smaller than 9999999999 are in seconds instead of milliseconds
start_end_in_seconds = sum(duration_check$start_is_seconds) + sum(duration_check$end_is_seconds)
if(start_end_in_seconds > 0) # convert seconds to milliseconds
conversation <- conversation %>% mutate(double_convo_start = double_convo_start * 1000, double_convo_end = double_convo_end * 1000)
}
return(conversation)
}
main <- function(data){
return(unify_ios_conversation(data))
}

View File

@ -12,3 +12,11 @@ PHONE_ACTIVITY_RECOGNITION:
- ACTIVITY_NAME - ACTIVITY_NAME
- CONFIDENCE - CONFIDENCE
PHONE_CONVERSATION:
- TIMESTAMP
- DEVICE_ID
- DATATYPE
- DOUBLE_ENERGY
- INFERENCE
- DOUBLE_CONVO_START
- DOUBLE_CONVO_END

View File

@ -149,7 +149,6 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]: if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))