From d470eef27e09985d5a3dedc8369d772397fb0624 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Wed, 9 Mar 2022 18:38:46 +0100
Subject: [PATCH 1/5] Add a rule to preprocess and clean ESM.

---
 Snakefile                                  |   4 +
 rules/preprocessing.smk                    |   9 +-
 src/features/phone_esm/straw/preprocess.py | 113 +++++++++++++++++++++
 3 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 src/features/phone_esm/straw/preprocess.py

diff --git a/Snakefile b/Snakefile
index 95cfbc95..24cdb73e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -167,6 +167,10 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
 for provider in config["PHONE_ESM"]["PROVIDERS"].keys():
     if config["PHONE_ESM"]["PROVIDERS"][provider]["COMPUTE"]:
         files_to_compute.extend(expand("data/raw/{pid}/phone_esm_raw.csv",pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_esm_with_datetime.csv",pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_esm_clean.csv",pid=config["PIDS"]))
+        #files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
+        #files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
 
 # We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
 if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk
index cd83b95b..154556f3 100644
--- a/rules/preprocessing.smk
+++ b/rules/preprocessing.smk
@@ -177,7 +177,6 @@ rule resample_episodes_with_datetime:
     script:
         "../src/data/datetime/readable_datetime.R"
 
-
 rule phone_application_categories:
     input:
         "data/raw/{pid}/phone_applications_{type}_with_datetime.csv"
@@ -191,6 +190,14 @@ rule phone_application_categories:
     script:
         "../src/data/application_categories.R"
 
+rule preprocess_esm:
+    input: "data/raw/{pid}/phone_esm_with_datetime.csv"
+    params:
+        questionnaire_ids = [8,9]
+    output: "data/interim/{pid}/phone_esm_clean.csv"
+    script:
+        "../src/features/phone_esm/straw/preprocess.py"
+
 rule pull_wearable_data:
     input: unpack(pull_wearable_data_input_with_mutation_scripts)
     params:
diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py
new file mode 100644
index 00000000..7a38ecfe
--- /dev/null
+++ b/src/features/phone_esm/straw/preprocess.py
@@ -0,0 +1,113 @@
+import json
+import numpy as np
+import pandas as pd
+
+
+ESM_TYPE = {
+        "text": 1,
+        "radio": 2,
+        "checkbox": 3,
+        "likert": 4,
+        "quick_answers": 5,
+        "scale": 6,
+        "datetime": 7,
+        "pam": 8,
+        "number": 9,
+        "web": 10,
+        "date": 11,
+    }
+
+ESM_STATUS_ANSWERED = 2
+
+GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
+
+SESSION_STATUS_UNANSWERED = "ema_unanswered"
+SESSION_STATUS_DAY_FINISHED = "day_finished"
+SESSION_STATUS_COMPLETE = "ema_completed"
+
+ANSWER_DAY_FINISHED = "DayFinished3421"
+ANSWER_DAY_OFF = "DayOff3421"
+ANSWER_SET_EVENING = "DayFinishedSetEvening"
+
+MAX_MORNING_LENGTH = 3
+# When the participants was not yet at work at the time of the first (morning) EMA,
+# only three items were answered.
+# Two sleep related items and one indicating NOT starting work yet.
+# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
+
+
+def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
+    """
+    Convert timestamps into human-readable datetimes and dates
+    and expand the JSON column into several Pandas DF columns.
+
+    Parameters
+    ----------
+    df_esm: pd.DataFrame
+        A dataframe of esm data.
+
+    Returns
+    -------
+    df_esm_preprocessed: pd.DataFrame
+        A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
+    """
+    df_esm_json = df_esm["esm_json"].apply(json.loads)
+    df_esm_json = pd.json_normalize(df_esm_json).drop(
+        columns=["esm_trigger"]
+    )  # The esm_trigger column is already present in the main df.
+    return df_esm.join(df_esm_json)
+
+
+def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
+    """
+    This function eliminates invalid ESM responses.
+    It removes unanswered ESMs and those that indicate end of work and similar.
+    It also extracts a numeric answer from strings such as "4 - I strongly agree".
+
+    Parameters
+    ----------
+    df_esm_preprocessed: pd.DataFrame
+        A preprocessed dataframe of esm data.
+
+    Returns
+    -------
+    df_esm_clean: pd.DataFrame
+        A subset of the original dataframe.
+
+    """
+    df_esm_clean = df_esm_preprocessed[
+        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
+    ]
+    df_esm_clean = df_esm_clean[
+        ~df_esm_clean["esm_user_answer"].isin(
+            [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
+        )
+    ]
+    df_esm_clean["esm_user_answer_numeric"] = np.nan
+    esm_type_numeric = [
+        ESM_TYPE.get("radio"),
+        ESM_TYPE.get("scale"),
+        ESM_TYPE.get("number"),
+    ]
+    df_esm_clean.loc[
+        df_esm_clean["esm_type"].isin(esm_type_numeric)
+    ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(
+        esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
+            int
+        )
+    )
+    return df_esm_clean
+
+
+df_esm = pd.read_csv(snakemake.input[0])
+df_esm_preprocessed = preprocess_esm(df_esm)
+#TODO Enable getting the right questionnaire here.
+df_esm_PANAS = df_esm_preprocessed[
+    (df_esm_preprocessed["questionnaire_id"] == 8)
+    | (df_esm_preprocessed["questionnaire_id"] == 9)
+]
+df_esm_clean = clean_up_esm(df_esm_PANAS)
+
+df_esm_clean.to_csv(snakemake.output[0])
+
+

From 5f293211a7fecea8b9943001bc8820a7a8b9f86a Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Tue, 15 Mar 2022 13:28:51 +0100
Subject: [PATCH 2/5] Reformat.

---
 src/features/phone_esm/straw/preprocess.py | 30 ++++++++++------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py
index 7a38ecfe..d6279d9c 100644
--- a/src/features/phone_esm/straw/preprocess.py
+++ b/src/features/phone_esm/straw/preprocess.py
@@ -1,21 +1,21 @@
 import json
+
 import numpy as np
 import pandas as pd
 
-
 ESM_TYPE = {
-        "text": 1,
-        "radio": 2,
-        "checkbox": 3,
-        "likert": 4,
-        "quick_answers": 5,
-        "scale": 6,
-        "datetime": 7,
-        "pam": 8,
-        "number": 9,
-        "web": 10,
-        "date": 11,
-    }
+    "text": 1,
+    "radio": 2,
+    "checkbox": 3,
+    "likert": 4,
+    "quick_answers": 5,
+    "scale": 6,
+    "datetime": 7,
+    "pam": 8,
+    "number": 9,
+    "web": 10,
+    "date": 11,
+}
 
 ESM_STATUS_ANSWERED = 2
 
@@ -101,7 +101,7 @@ def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
 
 df_esm = pd.read_csv(snakemake.input[0])
 df_esm_preprocessed = preprocess_esm(df_esm)
-#TODO Enable getting the right questionnaire here.
+# TODO Enable getting the right questionnaire here.
 df_esm_PANAS = df_esm_preprocessed[
     (df_esm_preprocessed["questionnaire_id"] == 8)
     | (df_esm_preprocessed["questionnaire_id"] == 9)
@@ -109,5 +109,3 @@ df_esm_PANAS = df_esm_preprocessed[
 df_esm_clean = clean_up_esm(df_esm_PANAS)
 
 df_esm_clean.to_csv(snakemake.output[0])
-
-

From ef57103bac79faedfd498cfa56b5efb0dd608207 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Tue, 15 Mar 2022 13:41:33 +0100
Subject: [PATCH 3/5] Add questionnaire ID key.

---
 config.yaml                                |  4 ++
 src/features/phone_esm/straw/preprocess.py | 52 ++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/config.yaml b/config.yaml
index 92578437..5c774a6e 100644
--- a/config.yaml
+++ b/config.yaml
@@ -645,3 +645,7 @@ PARAMS_FOR_ANALYSIS:
     QUESTION_LIST: survey637813+question_text.csv
     FEATURES: [age, gender, startlanguage, demand, control, demand_control_ratio]
     CATEGORICAL_FEATURES: [gender]
+
+  TARGET:
+    SCALE: [positive_affect, negative_affect]
+
diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py
index d6279d9c..cd99d906 100644
--- a/src/features/phone_esm/straw/preprocess.py
+++ b/src/features/phone_esm/straw/preprocess.py
@@ -17,6 +17,58 @@ ESM_TYPE = {
     "date": 11,
 }
 
+QUESTIONNAIRE_IDS = {
+    "sleep_quality": 1,
+    "PANAS": {
+        "positive_affect": 8,
+        "negative_affect": 9
+    },
+    "job_content_questionnaire": {
+        "job_demand": 10,
+        "job_control": 11,
+        "supervisor_support": 12,
+        "coworker_support": 13,
+    },
+    "PFITS": {
+        "supervisor": 14,
+        "coworkers": 15
+    },
+    "UWES": {
+        "vigor": 16,
+        "dedication": 17,
+        "absorption": 18
+    },
+    "COPE": {
+        "active": 19,
+        "support": 20,
+        "emotions": 21
+    },
+    "work_life_balance": {
+        "life_work": 22,
+        "work_life": 23
+    },
+    "recovery_experience": {
+        "detachment": 24,
+        "relaxation": 25
+    },
+    "symptoms": 26,
+    "stress_appraisal": {
+        "stressfulness_event": 87,
+        "threat": 88,
+        "challenge": 89,
+        "event_time": 90,
+        "event_duration": 91,
+        "event_work_related": 92,
+        "stressfulness_period": 93,
+    },
+    "late_work": 94,
+    "work_hours": 95,
+    "left_work": 96,
+    "activities": 97,
+    "coffee_breaks": 98,
+    "at_work_yet": 99,
+}
+
 ESM_STATUS_ANSWERED = 2
 
 GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]

From 19b9da0ba367b67e4359756370f0ed747cc36237 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Wed, 16 Mar 2022 16:49:28 +0100
Subject: [PATCH 4/5] Separate function definitions from main.

---
 .../phone_esm/straw/esm_preprocess.py         | 151 +++++++++++++++++
 src/features/phone_esm/straw/preprocess.py    | 153 +-----------------
 2 files changed, 152 insertions(+), 152 deletions(-)
 create mode 100644 src/features/phone_esm/straw/esm_preprocess.py

diff --git a/src/features/phone_esm/straw/esm_preprocess.py b/src/features/phone_esm/straw/esm_preprocess.py
new file mode 100644
index 00000000..876be61e
--- /dev/null
+++ b/src/features/phone_esm/straw/esm_preprocess.py
@@ -0,0 +1,151 @@
+import json
+
+import numpy as np
+import pandas as pd
+
+ESM_TYPE = {
+    "text": 1,
+    "radio": 2,
+    "checkbox": 3,
+    "likert": 4,
+    "quick_answers": 5,
+    "scale": 6,
+    "datetime": 7,
+    "pam": 8,
+    "number": 9,
+    "web": 10,
+    "date": 11,
+}
+
+QUESTIONNAIRE_IDS = {
+    "sleep_quality": 1,
+    "PANAS": {
+        "positive_affect": 8,
+        "negative_affect": 9
+    },
+    "job_content_questionnaire": {
+        "job_demand": 10,
+        "job_control": 11,
+        "supervisor_support": 12,
+        "coworker_support": 13,
+    },
+    "PFITS": {
+        "supervisor": 14,
+        "coworkers": 15
+    },
+    "UWES": {
+        "vigor": 16,
+        "dedication": 17,
+        "absorption": 18
+    },
+    "COPE": {
+        "active": 19,
+        "support": 20,
+        "emotions": 21
+    },
+    "work_life_balance": {
+        "life_work": 22,
+        "work_life": 23
+    },
+    "recovery_experience": {
+        "detachment": 24,
+        "relaxation": 25
+    },
+    "symptoms": 26,
+    "stress_appraisal": {
+        "stressfulness_event": 87,
+        "threat": 88,
+        "challenge": 89,
+        "event_time": 90,
+        "event_duration": 91,
+        "event_work_related": 92,
+        "stressfulness_period": 93,
+    },
+    "late_work": 94,
+    "work_hours": 95,
+    "left_work": 96,
+    "activities": 97,
+    "coffee_breaks": 98,
+    "at_work_yet": 99,
+}
+
+ESM_STATUS_ANSWERED = 2
+
+GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
+
+SESSION_STATUS_UNANSWERED = "ema_unanswered"
+SESSION_STATUS_DAY_FINISHED = "day_finished"
+SESSION_STATUS_COMPLETE = "ema_completed"
+
+ANSWER_DAY_FINISHED = "DayFinished3421"
+ANSWER_DAY_OFF = "DayOff3421"
+ANSWER_SET_EVENING = "DayFinishedSetEvening"
+
+MAX_MORNING_LENGTH = 3
+# When the participants was not yet at work at the time of the first (morning) EMA,
+# only three items were answered.
+# Two sleep related items and one indicating NOT starting work yet.
+# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
+
+
+def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
+    """
+    Convert timestamps into human-readable datetimes and dates
+    and expand the JSON column into several Pandas DF columns.
+
+    Parameters
+    ----------
+    df_esm: pd.DataFrame
+        A dataframe of esm data.
+
+    Returns
+    -------
+    df_esm_preprocessed: pd.DataFrame
+        A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
+    """
+    df_esm_json = df_esm["esm_json"].apply(json.loads)
+    df_esm_json = pd.json_normalize(df_esm_json).drop(
+        columns=["esm_trigger"]
+    )  # The esm_trigger column is already present in the main df.
+    return df_esm.join(df_esm_json)
+
+
+def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
+    """
+    This function eliminates invalid ESM responses.
+    It removes unanswered ESMs and those that indicate end of work and similar.
+    It also extracts a numeric answer from strings such as "4 - I strongly agree".
+
+    Parameters
+    ----------
+    df_esm_preprocessed: pd.DataFrame
+        A preprocessed dataframe of esm data.
+
+    Returns
+    -------
+    df_esm_clean: pd.DataFrame
+        A subset of the original dataframe.
+
+    """
+    df_esm_clean = df_esm_preprocessed[
+        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
+    ]
+    df_esm_clean = df_esm_clean[
+        ~df_esm_clean["esm_user_answer"].isin(
+            [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
+        )
+    ]
+    df_esm_clean["esm_user_answer_numeric"] = np.nan
+    esm_type_numeric = [
+        ESM_TYPE.get("radio"),
+        ESM_TYPE.get("scale"),
+        ESM_TYPE.get("number"),
+    ]
+    df_esm_clean.loc[
+        df_esm_clean["esm_type"].isin(esm_type_numeric)
+    ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(
+        esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
+            int
+        )
+    )
+    return df_esm_clean
diff --git a/src/features/phone_esm/straw/preprocess.py b/src/features/phone_esm/straw/preprocess.py
index cd99d906..a46d04ec 100644
--- a/src/features/phone_esm/straw/preprocess.py
+++ b/src/features/phone_esm/straw/preprocess.py
@@ -1,155 +1,4 @@
-import json
-
-import numpy as np
-import pandas as pd
-
-ESM_TYPE = {
-    "text": 1,
-    "radio": 2,
-    "checkbox": 3,
-    "likert": 4,
-    "quick_answers": 5,
-    "scale": 6,
-    "datetime": 7,
-    "pam": 8,
-    "number": 9,
-    "web": 10,
-    "date": 11,
-}
-
-QUESTIONNAIRE_IDS = {
-    "sleep_quality": 1,
-    "PANAS": {
-        "positive_affect": 8,
-        "negative_affect": 9
-    },
-    "job_content_questionnaire": {
-        "job_demand": 10,
-        "job_control": 11,
-        "supervisor_support": 12,
-        "coworker_support": 13,
-    },
-    "PFITS": {
-        "supervisor": 14,
-        "coworkers": 15
-    },
-    "UWES": {
-        "vigor": 16,
-        "dedication": 17,
-        "absorption": 18
-    },
-    "COPE": {
-        "active": 19,
-        "support": 20,
-        "emotions": 21
-    },
-    "work_life_balance": {
-        "life_work": 22,
-        "work_life": 23
-    },
-    "recovery_experience": {
-        "detachment": 24,
-        "relaxation": 25
-    },
-    "symptoms": 26,
-    "stress_appraisal": {
-        "stressfulness_event": 87,
-        "threat": 88,
-        "challenge": 89,
-        "event_time": 90,
-        "event_duration": 91,
-        "event_work_related": 92,
-        "stressfulness_period": 93,
-    },
-    "late_work": 94,
-    "work_hours": 95,
-    "left_work": 96,
-    "activities": 97,
-    "coffee_breaks": 98,
-    "at_work_yet": 99,
-}
-
-ESM_STATUS_ANSWERED = 2
-
-GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]
-
-SESSION_STATUS_UNANSWERED = "ema_unanswered"
-SESSION_STATUS_DAY_FINISHED = "day_finished"
-SESSION_STATUS_COMPLETE = "ema_completed"
-
-ANSWER_DAY_FINISHED = "DayFinished3421"
-ANSWER_DAY_OFF = "DayOff3421"
-ANSWER_SET_EVENING = "DayFinishedSetEvening"
-
-MAX_MORNING_LENGTH = 3
-# When the participants was not yet at work at the time of the first (morning) EMA,
-# only three items were answered.
-# Two sleep related items and one indicating NOT starting work yet.
-# Daytime EMAs are all longer, in fact they always consist of at least 6 items.
-
-
-def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
-    """
-    Convert timestamps into human-readable datetimes and dates
-    and expand the JSON column into several Pandas DF columns.
-
-    Parameters
-    ----------
-    df_esm: pd.DataFrame
-        A dataframe of esm data.
-
-    Returns
-    -------
-    df_esm_preprocessed: pd.DataFrame
-        A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
-    """
-    df_esm_json = df_esm["esm_json"].apply(json.loads)
-    df_esm_json = pd.json_normalize(df_esm_json).drop(
-        columns=["esm_trigger"]
-    )  # The esm_trigger column is already present in the main df.
-    return df_esm.join(df_esm_json)
-
-
-def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
-    """
-    This function eliminates invalid ESM responses.
-    It removes unanswered ESMs and those that indicate end of work and similar.
-    It also extracts a numeric answer from strings such as "4 - I strongly agree".
-
-    Parameters
-    ----------
-    df_esm_preprocessed: pd.DataFrame
-        A preprocessed dataframe of esm data.
-
-    Returns
-    -------
-    df_esm_clean: pd.DataFrame
-        A subset of the original dataframe.
-
-    """
-    df_esm_clean = df_esm_preprocessed[
-        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
-    ]
-    df_esm_clean = df_esm_clean[
-        ~df_esm_clean["esm_user_answer"].isin(
-            [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
-        )
-    ]
-    df_esm_clean["esm_user_answer_numeric"] = np.nan
-    esm_type_numeric = [
-        ESM_TYPE.get("radio"),
-        ESM_TYPE.get("scale"),
-        ESM_TYPE.get("number"),
-    ]
-    df_esm_clean.loc[
-        df_esm_clean["esm_type"].isin(esm_type_numeric)
-    ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(
-        esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
-            int
-        )
-    )
-    return df_esm_clean
-
+from esm_preprocess import *
 
 df_esm = pd.read_csv(snakemake.input[0])
 df_esm_preprocessed = preprocess_esm(df_esm)

From cb116100dd50b7a86b5aabfd7b25cc14dd3fc06d Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Wed, 16 Mar 2022 17:06:42 +0100
Subject: [PATCH 5/5] Move preprocessing to features.

---
 rules/features.smk      | 8 ++++++++
 rules/preprocessing.smk | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/rules/features.smk b/rules/features.smk
index f0fea945..defe843a 100644
--- a/rules/features.smk
+++ b/rules/features.smk
@@ -324,6 +324,14 @@ rule conversation_r_features:
     script:
         "../src/features/entry.R"
 
+rule preprocess_esm:
+    input: "data/raw/{pid}/phone_esm_with_datetime.csv"
+    params:
+        questionnaire_ids = [8,9]
+    output: "data/interim/{pid}/phone_esm_clean.csv"
+    script:
+        "../src/features/phone_esm/straw/preprocess.py"
+
 rule phone_keyboard_python_features:
     input:
         sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv",
diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk
index 154556f3..83608204 100644
--- a/rules/preprocessing.smk
+++ b/rules/preprocessing.smk
@@ -190,14 +190,6 @@ rule phone_application_categories:
     script:
         "../src/data/application_categories.R"
 
-rule preprocess_esm:
-    input: "data/raw/{pid}/phone_esm_with_datetime.csv"
-    params:
-        questionnaire_ids = [8,9]
-    output: "data/interim/{pid}/phone_esm_clean.csv"
-    script:
-        "../src/features/phone_esm/straw/preprocess.py"
-
 rule pull_wearable_data:
     input: unpack(pull_wearable_data_input_with_mutation_scripts)
     params: