From 75b054d3580a8957b5e0d18c7ccf81049d73b8e3 Mon Sep 17 00:00:00 2001
From: Primoz <sisko.primoz@gmail.com>
Date: Tue, 17 Jan 2023 14:00:14 +0000
Subject: [PATCH] Integrate phone_speech into rapids pipeline.

---
 Snakefile                                     |  3 ++-
 rules/features.smk                            |  4 ++--
 src/data/streams/aware_postgresql/format.yaml | 21 +++++++++++++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index c591650a..b8631e4a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -178,9 +178,10 @@ for provider in config["PHONE_SPEECH"]["PROVIDERS"].keys():
     if config["PHONE_SPEECH"]["PROVIDERS"][provider]["COMPUTE"]:
         files_to_compute.extend(expand("data/raw/{pid}/phone_speech_raw.csv",pid=config["PIDS"]))
         files_to_compute.extend(expand("data/raw/{pid}/phone_speech_with_datetime.csv",pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/phone_speech_clean.csv",pid=config["PIDS"]))
         files_to_compute.extend(expand("data/interim/{pid}/phone_speech_features/phone_speech_{language}_{provider_key}.csv",pid=config["PIDS"],language=get_script_language(config["PHONE_SPEECH"]["PROVIDERS"][provider]["SRC_SCRIPT"]),provider_key=provider.lower()))
         files_to_compute.extend(expand("data/processed/features/{pid}/phone_speech.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
+        files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
 
 # We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
 if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
diff --git a/rules/features.smk b/rules/features.smk
index 83d08568..e6a3b0bd 100644
--- a/rules/features.smk
+++ b/rules/features.smk
@@ -347,12 +347,12 @@ rule esm_features:
 
 rule phone_speech_python_features:
     input:
-        sensor_data = "data/interim/{pid}/phone_speech_with_datetime.csv",
+        sensor_data = "data/raw/{pid}/phone_speech_with_datetime.csv",
         time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
     params:
         provider = lambda wildcards: config["PHONE_SPEECH"]["PROVIDERS"][wildcards.provider_key.upper()],
         provider_key = "{provider_key}",
-        sensor_key = "phone_speech",
+        sensor_key = "phone_speech"
     output: 
         "data/interim/{pid}/phone_speech_features/phone_speech_python_{provider_key}.csv"
     script:
diff --git a/src/data/streams/aware_postgresql/format.yaml b/src/data/streams/aware_postgresql/format.yaml
index f4cf20cd..ebdd6062 100644
--- a/src/data/streams/aware_postgresql/format.yaml
+++ b/src/data/streams/aware_postgresql/format.yaml
@@ -349,3 +349,24 @@ PHONE_WIFI_VISIBLE:
       COLUMN_MAPPINGS:
       SCRIPTS: # List any python or r scripts that mutate your raw data
 
+PHONE_SPEECH:
+  ANDROID:
+    RAPIDS_COLUMN_MAPPINGS:
+      TIMESTAMP: timestamp
+      DEVICE_ID: device_id
+      SPEECH_PROPORTION: speech_proportion
+    MUTATION:
+      COLUMN_MAPPINGS:
+      SCRIPTS: # List any python or r scripts that mutate your raw data
+  IOS:
+    RAPIDS_COLUMN_MAPPINGS:
+      TIMESTAMP: timestamp
+      DEVICE_ID: device_id
+      SPEECH_PROPORTION: speech_proportion
+    MUTATION:
+      COLUMN_MAPPINGS:
+      SCRIPTS: # List any python or r scripts that mutate your raw data
+
+
+      
+