From 75b054d3580a8957b5e0d18c7ccf81049d73b8e3 Mon Sep 17 00:00:00 2001 From: Primoz Date: Tue, 17 Jan 2023 14:00:14 +0000 Subject: [PATCH] Integrate phone_speech into rapids pipeline. --- Snakefile | 3 ++- rules/features.smk | 4 ++-- src/data/streams/aware_postgresql/format.yaml | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Snakefile b/Snakefile index c591650a..b8631e4a 100644 --- a/Snakefile +++ b/Snakefile @@ -178,9 +178,10 @@ for provider in config["PHONE_SPEECH"]["PROVIDERS"].keys(): if config["PHONE_SPEECH"]["PROVIDERS"][provider]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/phone_speech_raw.csv",pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_speech_with_datetime.csv",pid=config["PIDS"])) - files_to_compute.extend(expand("data/interim/{pid}/phone_speech_clean.csv",pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_speech_features/phone_speech_{language}_{provider_key}.csv",pid=config["PIDS"],language=get_script_language(config["PHONE_SPEECH"]["PROVIDERS"][provider]["SRC_SCRIPT"]),provider_key=provider.lower())) files_to_compute.extend(expand("data/processed/features/{pid}/phone_speech.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) + files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") # We can delete these if's as soon as we add feature PROVIDERS to any of these sensors if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict): diff --git a/rules/features.smk b/rules/features.smk index 83d08568..e6a3b0bd 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -347,12 +347,12 @@ rule esm_features: rule phone_speech_python_features: input: - sensor_data = "data/interim/{pid}/phone_speech_with_datetime.csv", + sensor_data = "data/raw/{pid}/phone_speech_with_datetime.csv", time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" params: provider = lambda wildcards: config["PHONE_SPEECH"]["PROVIDERS"][wildcards.provider_key.upper()], provider_key = "{provider_key}", - sensor_key = "phone_speech", + sensor_key = "phone_speech" output: "data/interim/{pid}/phone_speech_features/phone_speech_python_{provider_key}.csv" script: diff --git a/src/data/streams/aware_postgresql/format.yaml b/src/data/streams/aware_postgresql/format.yaml index f4cf20cd..ebdd6062 100644 --- a/src/data/streams/aware_postgresql/format.yaml +++ b/src/data/streams/aware_postgresql/format.yaml @@ -349,3 +349,24 @@ PHONE_WIFI_VISIBLE: COLUMN_MAPPINGS: SCRIPTS: # List any python or r scripts that mutate your raw data +PHONE_SPEECH: + ANDROID: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SPEECH_PROPORTION: speech_proportion + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + IOS: + RAPIDS_COLUMN_MAPPINGS: + TIMESTAMP: timestamp + DEVICE_ID: device_id + SPEECH_PROPORTION: speech_proportion + MUTATION: + COLUMN_MAPPINGS: + SCRIPTS: # List any python or r scripts that mutate your raw data + + + +